class DataLoaderMultiFiles(object): """DataLoader to iterator over a set of DataSet""" def __init__(self, filepaths, partial, batch_s, buffer_s): self.filepaths = filepaths self.partial = partial self.batch_size = batch_s self.max_len = buffer_s self.buffer = Queue(maxsize=buffer_s) self.batch_queue = Queue(maxsize=10) def __iter__(self): print('Starting processes') random.seed(0) random.shuffle(self.filepaths) filepaths = deque() for path in self.filepaths: filepaths.append(path) self.buffr_processes = [] args = (self.filepaths, self.buffer, self.partial) for i in range(10): process = Process(target=fill_buffer, args=args) process.daemon = True process.start() self.buffr_processes.append(process) args = (self.buffer, self.batch_queue, self.batch_size) self.batch_process = Process(target=fill_batch, args=args) self.batch_process.daemon = True self.batch_process.start() return self def done_files(self): return sum([e.is_alive() for e in self.buffr_processes]) def __next__(self): # print('get batch') # print('buffer_queue: {}, batch_queue: {}'.format(self.buffer.qsize(), self.batch_queue.qsize())) # noqa timeout = 1 if self.done_files() == 0 else 60 try: batch = self.batch_queue.get(timeout=timeout) except Empty: self.kill() raise StopIteration # print('got batch') tmp = LongTensor(batch) # print('computing') return tmp def kill(self): print('Killing processes') self.buffr_process.terminate() self.batch_process.terminate() def __del__(self): self.kill()
def spawn_process(config, gpu_id=None, port=23456, **kwargs): processes = [] if gpu_id is None: gpu_id = [0] try: for rank, gpu_id_val in enumerate(gpu_id): p = Process(target=main, args=(config, rank, len(gpu_id), gpu_id_val, port, kwargs)) p.start() processes.append(p) for p in processes: p.join() except KeyboardInterrupt: for p in processes: p.terminate()
def _launch_procs(self, num_procs): mp.set_start_method('forkserver', force=True) skip_msg = mp.Queue( ) # Allows forked processes to share pytest.skip reason processes = [] for local_rank in range(num_procs): p = Process(target=self._dist_init, args=(local_rank, num_procs, skip_msg)) p.start() processes.append(p) # Now loop and wait for a test to complete. The spin-wait here isn't a big # deal because the number of processes will be O(#GPUs) << O(#CPUs). any_done = False while not any_done: for p in processes: if not p.is_alive(): any_done = True break # Wait for all other processes to complete for p in processes: p.join(DEEPSPEED_UNIT_WORKER_TIMEOUT) failed = [(rank, p) for rank, p in enumerate(processes) if p.exitcode != 0] for rank, p in failed: # If it still hasn't terminated, kill it because it hung. if p.exitcode is None: p.terminate() pytest.fail(f'Worker {rank} hung.', pytrace=False) if p.exitcode < 0: pytest.fail(f'Worker {rank} killed by signal {-p.exitcode}', pytrace=False) if p.exitcode > 0: pytest.fail(f'Worker {rank} exited with code {p.exitcode}', pytrace=False) if not skip_msg.empty(): # This assumed all skip messages are the same, it may be useful to # add a check here to assert all exit messages are equal pytest.skip(skip_msg.get())
class DataLoaderMultiFiles(object): """DataLoader to iterator over a set of DataSet""" def __init__(self, dataset, batch_s): self.dataset = dataset self.batch_size = batch_s self.index_queue = deque(torch.randperm(len(self.dataset)).tolist()) self.batch_queue = Queue(maxsize=5) def __iter__(self): print('new iteration of dataloader') args = (self.batch_queue, self.index_queue, self.dataset, self.batch_size) self.batch_process = Process(target=fill_batch, args=args) self.batch_process.daemon = True self.batch_process.start() return self def is_alive(self): # return sum([e.is_alive() for e in self.buffr_processes]) return self.batch_process.is_alive() def __next__(self): # print('batch_queue: {}'.format(self.batch_queue.qsize())) timeout = 600 if self.is_alive() else 1 try: batch = self.batch_queue.get(timeout=timeout) except Empty: print('empty') self.kill() raise StopIteration # print('got batch') tmp = LongTensor(batch) # print('computing') return tmp def kill(self): print('Killing processes') self.batch_process.terminate() def __del__(self): self.kill()
def dist_launcher(num_procs, *func_args, **func_kwargs): """Launch processes and gracefully handle failures.""" # Spawn all workers on subprocesses. processes = [] for local_rank in range(num_procs): p = Process( target=dist_init, args=(local_rank, num_procs, *func_args), kwargs=func_kwargs, ) p.start() processes.append(p) # Now loop and wait for a test to complete. The spin-wait here isn't a big # deal because the number of processes will be O(#GPUs) << O(#CPUs). any_done = False while not any_done: for p in processes: if not p.is_alive(): any_done = True break # Wait for all other processes to complete for p in processes: p.join(DEEPSPEED_UNIT_WORKER_TIMEOUT) failed = [(rank, p) for rank, p in enumerate(processes) if p.exitcode != 0] for rank, p in failed: # If it still hasn't terminated, kill it because it hung. if p.exitcode is None: p.terminate() pytest.fail(f"Worker {rank} hung.", pytrace=False) if p.exitcode < 0: pytest.fail( f"Worker {rank} killed by signal {-p.exitcode}", pytrace=False) if p.exitcode > 0: pytest.fail(f"Worker {rank} exited with code {p.exitcode}", pytrace=False)
class Rotenc(object): """ Rotation Encoder: from 0.0-360.0 read the continous stream like ... b'328.37\r\n' b'327.87\r\n' ... from serial port serial.Serial('/dev/ttyACM0', 192000) """ def __init__(self, dev='/dev/ttyACM0', baudrate=192000): self.dev = dev try: self.ser = serial.Serial(dev, baudrate) self.is_connected = True except: print( 'cannot find serial port at {} to read rotation degree'.format( self.dev)) self.is_connected = False self.direction = torch.empty(1, ) self.direction.share_memory_() self.direction.fill_(0.0) def _rotenc_process(self): while self.is_connected: self.direction.fill_(float(self.ser.readline().decode("utf-8"))) print(self.direction) def start(self): self.rotenc_process = Process(target=self._rotenc_process) self.rotenc_process.daemon = True self.rotenc_process.start() def stop(self): self.rotenc_process.terminate() self.rotenc_process.join()
class Base(object): def __init__(self): self.epoch = 0 self.iteration = 0 self.offset = 0 # for multiprocessing self._epoch = 0 # Setting for multiprocessing self.preloading_process = None self.queue = Queue() self.queue_size = 0 def count_vocab_size(self, dict_path): vocab_count = 1 # for <blank> with codecs.open(dict_path, 'r', 'utf-8') as f: for line in f: if line.strip() != '': vocab_count += 1 return vocab_count def __len__(self): return len(self.df) def __getitem__(self, index): raise NotImplementedError() def __iter__(self): """Returns self.""" return self @property def epoch_detail(self): # Floating point version of epoch return self.epoch + (self.offset / len(self)) def __next__(self, batch_size=None): """Generate each mini-batch. Args: batch_size (int): the size of mini-batch Returns: batch (tuple): is_new_epoch (bool): If true, 1 epoch is finished """ if batch_size is None: batch_size = self.batch_size if self.n_ques is None: if self.max_epoch is not None and self.epoch >= self.max_epoch: raise StopIteration() # NOTE: max_epoch == None means infinite loop data_indices, is_new_epoch = self.sample_index(batch_size) batch = self.make_batch(data_indices) self.iteration += len(data_indices) else: # Clean up multiprocessing if self.preloading_process is not None and self.queue_size == 0: self.preloading_process.terminate() self.preloading_process.join() if self.max_epoch is not None and self.epoch >= self.max_epoch: # Clean up multiprocessing self.preloading_process.terminate() self.preloading_process.join() raise StopIteration() # NOTE: max_epoch == None means infinite loop # Enqueue mini-batches if self.queue_size == 0: self.df_indices_list = [] self.is_new_epoch_list = [] for _ in six.moves.range(self.n_ques): data_indices, is_new_epoch = self.sample_index(batch_size) self.df_indices_list.append(data_indices) self.is_new_epoch_list.append(is_new_epoch) self.preloading_process = Process(self.preloading_loop, args=(self.queue, self.df_indices_list)) self.preloading_process.start() self.queue_size += self.n_ques time.sleep(3) # print(self.queue.qsize()) # print(self.queue_size) self.iteration += len(self.df_indices_list[self.n_ques - self.queue_size]) self.queue_size -= 1 batch = self.queue.get() is_new_epoch = self.is_new_epoch_list.pop(0) if is_new_epoch: self.epoch += 1 return batch, is_new_epoch def next(self, batch_size=None): # For python2 return self.__next__(batch_size) def sample_index(self, batch_size): """Sample data indices of mini-batch. Args: batch_size (int): the size of mini-batch Returns: data_indices (np.ndarray): is_new_epoch (bool): """ is_new_epoch = False if self.sort_by_input_length or not self.shuffle: if self.sort_by_input_length: # Change batch size dynamically min_n_frames_batch = self.df[self.offset:self.offset + 1]['xlen'].values[0] batch_size_tmp = self.select_batch_size( batch_size, min_n_frames_batch) else: batch_size_tmp = batch_size if len(self.rest) > batch_size_tmp: data_indices = list(self.df[self.offset:self.offset + batch_size_tmp].index) self.rest -= set(data_indices) # NOTE: rest is in uttrance length order when sort_by_input_length == True # NOTE: otherwise in name length order when shuffle == False self.offset += len(data_indices) else: # Last mini-batch data_indices = list(self.df[self.offset:self.offset + len(self.rest)].index) self._reset() is_new_epoch = True self._epoch += 1 if self._epoch == self.sort_stop_epoch: self.sort_by_input_length = False self.shuffle = True # Sort in the descending order for pytorch data_indices = data_indices[::-1] else: # Randomly sample uttrances if len(self.rest) > batch_size: data_indices = random.sample(list(self.rest), batch_size) self.rest -= set(data_indices) else: # Last mini-batch data_indices = list(self.rest) self._reset() is_new_epoch = True self._epoch += 1 self.offset += len(data_indices) return data_indices, is_new_epoch def select_batch_size(self, batch_size, min_n_frames_batch): if not self.dynamic_batching: return batch_size if min_n_frames_batch <= 800: pass elif min_n_frames_batch <= 1600: batch_size = int(batch_size / 2) else: batch_size = int(batch_size / 4) if batch_size < 1: batch_size = 1 return batch_size def reset(self): self._reset() self.queue = Queue() self.queue_size = 0 # Clean up multiprocessing if self.preloading_process is not None: self.preloading_process.terminate() self.preloading_process.join() def _reset(self): """Reset data counter and offset.""" self.rest = set(list(self.df.index)) self.offset = 0 def preloading_loop(self, queue, df_indices_list): """. Args: queue (): df_indices_list (np.ndarray): """ # print("Pre-loading started.") for i in six.moves.range(len(df_indices_list)): queue.put(self.make_batch(df_indices_list[i]))
class Fpga(object): """docstring for FPGA""" def __init__(self, prb): self.prb = prb # self.group_idx = np.array(self.prb.grp_dict.keys()) self.reset() # self.load_vq() def close(self): self.r32.close() def reset(self): self.r32 = io.open('/dev/xillybus_fet_clf_32', 'rb') # self.r32_buf = io.BufferedReader(r32) self.fd = os.open("./fet.bin", os.O_CREAT | os.O_WRONLY | os.O_NONBLOCK) self._size = 7*4 # 6 samples, 4 bytes/sample self.shared_mem_init() self.labels = {} self.spk_times = {} for grp_id in self.prb.grp_dict.keys(): self.labels[grp_id] = np.array([]) self.spk_times[grp_id] = np.array([]) # Create a TCP/IP socket # self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # self.server_address = ('localhost', 10000) # print >> sys.stderr, 'starting up on %s port %s' % self.server_address # self.sock.bind(self.server_address) # # Listen for incoming connections # self.sock.listen(1) # print >> sys.stderr, 'waiting for a connection' # self.connection, self.client_address = self.sock.accept() # print >> sys.stderr, 'connection from', self.client_address def shared_mem_init(self): n_spike_count_vector = len(self.prb.grp_dict.keys()) # trigger task using frame counter self.spike_count_vector = torch.zeros(n_spike_count_vector,) self.spike_count_vector.share_memory_() def load_vq(self, vq_file='vq.npy'): self.vq = np.load(vq_file).item(0) self.vq_grp_idx = self.vq['labels'].keys() self.log.info('vq from group:{} loaded'.format(self.vq_grp_idx)) def nnid_2_label(self, group_id, nn_id): return self.vq['labels'][group_id][nn_id] def _fpga_process(self): ''' A daemon process dedicated on reading data from PCIE and update the shared memory with other processors: shared_arr ''' tic = time.time() * 1000 while True: # with shared_arr.get_lock(): # tic = time.time() * 1000 # buf = r32_buf.read(_size) buf = self.r32.read(self._size) # f.write(buf) os.write(self.fd, buf) toc = time.time() * 1000 # self.log.info('{} elapsed'.format(toc-tic)) fet = np.frombuffer(buf,dtype=np.int32).reshape(-1,7) for _fet in fet: spk_time = _fet[0]/25. #ms group_id = _fet[1] if group_id in self.vq_grp_idx: # only calculate for vq group # self.log.info(self.vq['labels'][group_id]) # self.log.info(_fet) _nnid = _fet[-1] _label = self.nnid_2_label(group_id, _nnid) # self.spk_times[group_id] = np.append(self.spk_times[group_id], spk_time) # self.labels[group_id] = np.append(self.labels[group_id], _label) if _label != 0 and _label != 1: self.spike_count_vector[group_id] += 1 # self.log.info('{}'.format(self.spike_count_vector.numpy())) # for group_id in fet_info[:,1]: # if group_id in self.group_idx: # self.spk_times[group_id].append(fet_info[:, 0]) # _label = self.vq['labels'][group_id][fet[group_id]] # self.label[group_id].append(fet_info[:, -1]) # self.spike_count_vector[group_id] += 1 # self.log.info('{}'.format(self.spike_count_vector.numpy())) # _unique, _counts = np.unique(fet_info[:,1], return_counts=True) # for i in _unique: # if i in self.group_idx: # self.log.info('{}: {}'.format(i, _counts[i])) # self.spike_count_vector[self.prb[i]] += torch.tensor(_counts[i]) # self.log.info('{}'.format(fet_info[0])) def start(self): self.fpga_process = Process(target=self._fpga_process, name='fpga') #, args=(self.pipe_jovian_side,) self.fpga_process.daemon = True self.fpga_process.start() def stop(self): self.fpga_process.terminate() self.fpga_process.join()
class BMI(object): """ BMI: https://github.com/chongxi/spiketag/issues/58 1. receive bmi output from FPGA through a pcie channel, save to a file 2. parse the bmi output (timestamp, group_id, fet[:4], spike_id) 3. send the output to the binner, which will emit event to trigger decoder each time a new bin is completely filled 4. put the output into the queue for gui to display A) configure mode: >>> bmi = BMI(prb) in this case, `bmi.fpga` is used to configure FPGA model parameters B) real-time spike inference mode: >>> bmi = BMI(prb, fetfile) in this case, not only `bmi.fpga` can be used to configure FPGA, but also these parameters should be read out to configure higher-level containers such as a BMI GUI C) Additional to the spike inference, the inferred spikes can be fed into `binner` and then to a decoder >>> bmi.set_binner(bin_size, B_bins) >>> bmi.set_decoder(dec, dec_result_file='./decoded_pos.bin') D) Start bmi with or without a `gui_queue` for visualization >>> bmi.start(gui_queue=True) >>> bmi.stop() E) Read out the content in the bmi.gui_queue for visualization >>> bmi.gui_queue.get() """ def __init__(self, prb=None, fetfile=None): if prb is not None: self.prb = prb self.ngrp = prb.n_group # self.group_idx = np.array(list(self.prb.grp_dict.keys())) self.fpga = xike_config(self.prb) else: self.ngrp = 40 # by default self.fpga = xike_config() # by default print('{} groups on probe'.format(self.ngrp)) print('{} groups is configured in the FPGA: {}'.format(len(self.fpga.configured_groups), self.fpga.configured_groups)) print('{} neurons are configured in the FPGA'.format(self.fpga.n_units+1)) print('---1. BMI spike-model initiation succeed---\n') if fetfile is not None: self.init_bmi_packet_channel() self.fetfile = fetfile self.fd = os.open(self.fetfile, os.O_CREAT | os.O_WRONLY | os.O_NONBLOCK) print('spike-id and feature is saved to {}\n'.format(self.fetfile)) self.binner = None def close(self): self.r32.close() def init_bmi_packet_channel(self): self.r32 = io.open('/dev/xillybus_fet_clf_32', 'rb') self._size = 7*4 # 7 samples, 4 bytes/sample self.bmi_buf = None print('spike-id packet channel is opened\n') def set_binner(self, bin_size, B_bins): ''' set bin size, N neurons and B bins for the binner ''' N_units = self.fpga.n_units + 1 self.binner = Binner(bin_size, N_units, B_bins) # binner initialization (space and time) print('BMI binner: {} bins {} units, each bin is {} seconds'.format(B_bins, N_units, bin_size)) print('---2. BMI binner initiation succeed---\n') # @self.binner.connect # def on_decode(X): # # print(self.binner.nbins, np.sum(self.binner.output), self.binner.count_vec.shape) # print(self.binner.nbins, self.binner.count_vec.shape, X.shape, np.sum(X)) # def shared_mem_init(self): # n_spike_count_vector = len(self.prb.grp_dict.keys()) # # trigger task using frame counter # self.spike_count_vector = torch.zeros(n_spike_count_vector,) # self.spike_count_vector.share_memory_() def set_decoder(self, dec, dec_result_file=None): print('------------------------------------------------------------------------') print('---Set the decoder `t_window` and `t_step` according to the bmi.binner---\r\n') self.dec = dec self.dec.resample(t_step=self.binner.bin_size, t_window=self.binner.bin_size*self.binner.B) print('--- Training decoder --- \r\n') self.dec.partition(training_range=[0.0, 1.0], valid_range=[0.5, 0.6], testing_range=[0.0, 1.0]) score = self.dec.auto_pipeline(smooth_sec=2) # 2 seconds smooth for scoring if dec_result_file is not None: self.dec_result = os.open(dec_result_file, os.O_CREAT | os.O_WRONLY | os.O_NONBLOCK) print('------------------------------------------------------------------------') ### key code (move this part anywhere needed, e.g. connect to playground) # print('connecting decoder to the bmi for real-time control') # @self.binner.connect # def on_decode(X): # # print(self.binner.nbins, self.binner.count_vec.shape, X.shape, np.sum(X)) # with Timer('decoding', verbose=True): # if dec.name == 'NaiveBayes': # X = np.sum(X, axis=0) # y = self.dec.predict(X) # print('pos:{0}, time:{1:.5f} secs'.format(y, self.binner.current_time)) # os.write(self.dec_result, np.hstack((self.binner.last_bin, y))) print('---3. BMI Decoder initiation succeed---\n') def read_bmi(self): ''' take buf from pcie channel '/dev/xillybus_fet_clf_32' filter the output with defined rules according to timestamp and grp_id each bmi_output is a compressed spike: (timestamp, grp_id, fet0, fet1, fet2, fet3, spk_id) ''' # filled = False # while not filled: self.buf = self.r32.read(self._size) os.write(self.fd, self.buf) # bmi_output = struct.unpack('<7i', self.buf) bmi_output = bmi_stream(self.buf) # bmi filter # if bmi_output.spk_id > 0: # filled=True return bmi_output def BMI_core_func(self, gui_queue): ''' A daemon process dedicated on reading data from PCIE and update the shared memory with other processors: shared_arr This process func starts when self.start() it ends with self.stop() ''' while True: with Timer('real-time decoding', verbose=False): bmi_output = self.read_bmi() # timestamp, grp_id, fet0, fet1, fet2, fet3, spk_id = bmi_output # ----- real-time processing the BMI output ------ # ----- This section should cost < 100us ----- ##### real-time decoder # 1. binner # print(bmi_output.timestamp, bmi_output.grp_id) if self.binner is not None: self.binner.input(bmi_output) # print(bmi_output.output) # 2. gui queue (optional) ##### queue for visualization on GUI if self.gui_queue is not None: self.gui_queue.put(bmi_output.output) ##### file for visualization # ----- This section should cost < 100us ----- def start(self, gui_queue=False): if gui_queue: self.gui_queue = SimpleQueue() else: self.gui_queue = None self.fpga_process = Process(target=self.BMI_core_func, name='fpga', args=(self.gui_queue,)) #, args=(self.pipe_jovian_side,) self.fpga_process.daemon = True self.fpga_process.start() def stop(self): self.fpga_process.terminate() self.fpga_process.join() self.gui_queue = None
class Jovian(EventEmitter): ''' Jovian is the abstraction of Remote Jovian software, it does following job: 0. jov = Jovian() # instance 1. jov.readline().parse() # read from mouseover 2. jov.start(); jov.stop() # start reading process in an other CPU 3. jov.set_trigger(); jov.examine_trigger(); # set and examine the trigger condition (so far only touch) based on both current input and current state 4. jov.teleport(prefix, target_pos, target_item) # execute output (so far only teleport) Jovian is a natrual event emit, it generate two `events`: 1. touch (according to input and trigger condition, it touches something) 2. teleport (based on the task fsm, something teleports) ''' def __init__(self): super(Jovian, self).__init__() self.socket_init() self.buf_init() self.shared_mem_init() self.rotenc_init() def socket_init(self): ### mouseover server connection self.input = socket.create_connection((host_ip, '22224'), timeout=1) # self.input.setblocking(False) # self.input.settimeout(0.8) self.input.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.output = socket.create_connection((host_ip, '22223'), timeout=1) self.output.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.output_control = socket.create_connection((host_ip, '22225'), timeout=1) self.enable_output() ### pynq server connection try: self.pynq = socket.create_connection((pynq_ip, '2222'), timeout=1) self.pynq.setblocking(1) self.pynq.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.pynq_connected = True self.socks = [self.input, self.output, self.output_control, self.pynq] except: self.pynq_connected = False self.socks = [self.input, self.output, self.output_control] def rotenc_init(self): ''' init the rotenc ''' self.rot = Rotenc() def buf_init(self): self.buf = None # the buf generator self.buffer = '' # the content self.buffering = False # the buffering state def shared_mem_init(self): # trigger task using frame counter self.cnt = torch.empty(1,) self.cnt.share_memory_() self.cnt.fill_(0) # current position of animal self.current_pos = torch.empty(3,) self.current_pos.share_memory_() # the influence radius of the animal self.touch_radius = torch.empty(1,) self.touch_radius.share_memory_() # bmi position (decoded position of the animal) self.bmi_pos = torch.empty(2,) self.bmi_pos.share_memory_() self.bmi_pos.fill_(0) # bmi head-direction (inferred head direction at bmi_pos) self.hd_window = torch.empty(1,) # time window(seconds) used to calculate head direction self.hd_window.share_memory_() self.hd_window.fill_(0) self.ball_vel = torch.empty(1,) self.ball_vel.share_memory_() self.ball_vel.fill_(0) self.bmi_hd = torch.empty(1,) # calculated hd sent to Jovian for VR rendering self.bmi_hd.share_memory_() self.bmi_hd.fill_(0) self.current_hd = torch.empty(1,) # calculated hd (same as bmi_hd) sent to Mazeview for local playground rendering self.current_hd.share_memory_() self.current_hd.fill_(0) # bmi radius (largest teleportation range) self.bmi_teleport_radius = torch.empty(1,) self.bmi_teleport_radius.share_memory_() self.bmi_teleport_radius.fill_(0) def reset(self): [conn.shutdown(2) for conn in self.socks] self.buf_init() self.socket_init() def enable_output(self, enable=True): if enable: self.output_control.send(b'1') else: self.output_control.send(b'1') def readbuffer(self): self.buffer = self.input.recv(256).decode("utf-8") self.buffering = True while self.buffering: if '\n' in self.buffer: (line, self.buffer) = self.buffer.split("\n", 1) yield Jovian_Stream(line + "\n") else: more = self.input.recv(256).decode("utf-8") if not more: self.buffering = False else: self.buffer += more if self.buffer: yield Jovian_Stream(self.buffer) def readline(self): if self.buf is None: self.buf = self.readbuffer() return self.buf.__next__() else: return self.buf.__next__() def _jovian_process(self): '''jovian reading process that use a multiprocessing pipe + a jovian instance as input parameters ''' while True: with Timer('', verbose=ENABLE_PROFILER): try: self._t, self._coord, self._ball_vel = self.readline().parse() _cue_name_0, _cue_name_1 = list(self.shared_cue_dict.keys()) if type(self._coord) is list: self.current_pos[:] = torch.tensor(self._coord) self.current_hd[:] = self.rot.direction self.ball_vel[:] = self._ball_vel self.log.info('{}, {}, {}, {}'.format(self._t, self.current_pos.numpy(), self.current_hd.numpy(), self._ball_vel)) self.log.info('cue_pos:, {},{}'.format(self.shared_cue_dict[_cue_name_0], self.shared_cue_dict[_cue_name_1])) self.task_routine() else: self.log.warn('{}, {}'.format(self._t, self._coord)) # except Exception as e: # self.log.warn(f'jovian recv process error:{e}') except: self.log.info('jovian recv process error') def set_bmi(self, bmi, pos_buffer_len=30): ''' This set BMI, Its binner and decoder event for JOV to act on. The event flow: bmi.binner.emit('decode', X) ==> jov customize the post decoding calculation inside the function `on_decode(X)` where the X is sent from the bmi.binner, but the `self` here is the jov set_bmi connect the event flow from decode(X) shared variable y=dec.predict_rt(X) (bmi_pos, bmi_hd) bmi =====================> jov ====================> task ''' ## Set the BMI buffer for smoothing both pos and hd self.bmi = bmi self.bmi_pos_buf = np.zeros((pos_buffer_len, 2)) hd_buffer_len = int(self.hd_window.item()/self.bmi.binner.bin_size) self.bmi_hd_buf = np.zeros((hd_buffer_len, 2)) self.bmi_hd_buf_ring = np.zeros((hd_buffer_len, )) self.log.info('Initiate the BMI decoder and playground jov connection') self.log.info('position buffer length:{}'.format(pos_buffer_len)) ## Set the real-time posterior placehodler dumb_X = np.zeros((self.bmi.binner.B, self.bmi.binner.N)) self.perm_idx = np.random.permutation(dumb_X.shape[1]) _, post_2d = self.bmi.dec.predict_rt(dumb_X, two_steps=self.bmi.two_steps, mean_firing_rate=self.bmi.mean_firing_rate) self.current_post_2d = torch.empty(post_2d.shape) self.current_post_2d.share_memory_() self.log.info('The decoder binsize:{}, the B_bins:{}'.format(self.bmi.binner.bin_size, self.bmi.binner.B)) self.log.info('The decoder input (spike count bin) shape:{}'.format(dumb_X.shape)) self.log.info('The decoder output (posterior) shape: {}'.format(self.current_post_2d.shape)) self.log.info('The bmi position update rule: {}'.format(self.bmi.bmi_update_rule)) self.speed_fifo = FIFO(depth=39) # self.bmi.dec.drop_neuron(np.array([7,9])) @self.bmi.binner.connect def on_decode(X): ''' This event is triggered every time a new bin is filled (based on BMI output timestamp) ''' # print(self.binner.nbins, self.binner.count_vec.shape, X.shape, np.sum(X)) with Timer('decoding', verbose=False): # ---------------------------------- # 1. Ring decoder for the head direction # ---------------------------------- # hd = self.bmi.dec.predict_rt(X) # hd should be a angle from [0, 360] # self.bmi_hd_buf_ring = np.hstack((self.bmi_hd_buf_ring[1:], hd)) # # print(self.bmi_hd_buf_ring) # self.bmi_hd[:] = torch.tensor(self.bmi_hd_buf_ring.mean()) # ---------------------------------- # 2. Bayesian decoder for the position # ---------------------------------- # if X.sum(axis=0)>2: # _X = X[:, self.perm_idx] y, post_2d = self.bmi.dec.predict_rt(X) post_2d /= post_2d.sum() max_posterior = post_2d.max() ### save scv and posterior to file ### f_scv = open('./scv.bin', 'ab+') f_scv.write(X.tobytes()) f_scv.close() f_post = open('./post_2d.bin', 'ab+') f_post.write(post_2d.tobytes()) f_post.close() ### Key: filter out criterion ### if X.sum()>2: self.current_post_2d[:] = torch.tensor(post_2d) * 1.0 # #################### just for dusty test ######################### # y += np.array([263.755, 263.755]) # y -= np.array([253.755, 253.755]) # y -= np.array([318.529, 195.760]) # y /= 4.5 # ################################################################## ball_vel_thres = self.bmi_teleport_radius.item() self.speed_fifo.input(self.ball_vel.numpy()[0]) # self.log.info('FIFO:{}'.format(self.speed_fifo.numpy())) speed = self.speed_fifo.mean()/14e-3/100 self.log.info('speed:{}, threshold:{}'.format(speed, ball_vel_thres)) self.log.info('max_post:{}, post_thres:{}'.format(max_posterior, self.bmi.posterior_threshold)) # current_speed = self.speed_fifo.mean() try: if self.bmi.bmi_update_rule == 'moving_average': # # rule1: decide the VR output by FIFO smoothing if speed < ball_vel_thres and X.sum()>2 and max_posterior>self.bmi.posterior_threshold: self.bmi_pos_buf = np.vstack((self.bmi_pos_buf[1:, :], y)) _teleport_pos = np.mean(self.bmi_pos_buf, axis=0) self.log.info('_teleport_pos:{}'.format(_teleport_pos)) else: _teleport_pos = self.bmi_pos.numpy() elif self.bmi.bmi_update_rule == 'fixed_length': # # rule2: decide the VR output by fixed length update u = (y-self.bmi_pos.numpy())/np.linalg.norm(y-self.bmi_pos.numpy()) tao = 5 if speed < ball_vel_thres and X.sum()>2 and max_posterior>self.bmi.posterior_threshold: tao = 5 # cm _teleport_pos = self.bmi_pos.numpy() + tao*u else: _teleport_pos = self.bmi_pos.numpy() elif self.bmi.bmi_update_rule == 'randomized_control': # # rule1: decide the VR output by FIFO smoothing if speed < ball_vel_thres and X.sum()>2 and max_posterior>self.bmi.posterior_threshold: last_mean_pos = np.mean(self.bmi_pos_buf, axis=0) self.bmi_pos_buf = np.vstack((self.bmi_pos_buf[1:, :], y)) mean_pos = np.mean(self.bmi_pos_buf, axis=0) diff_pos = mean_pos - last_mean_pos distance = np.linalg.norm(diff_pos) theta = np.random.uniform(low=0.0, high=2*np.pi) new_pos = self.bmi_pos.numpy() + np.array([distance*np.cos(theta), distance*np.sin(theta)]) # current position + randomly rotated distance _teleport_pos = boundray_check(new_pos) # make sure it is inside the maze self.log.info('_teleport_pos:{}'.format(_teleport_pos)) else: _teleport_pos = self.bmi_pos.numpy() # # set shared variable # _teleport_pos = rotate(_teleport_pos, theta=0) self.bmi_pos[:] = torch.tensor(_teleport_pos) # self.bmi_hd_buf = np.vstack((self.bmi_hd_buf[1:, :], _teleport_pos)) # window_size = int(self.hd_window[0]/self.bmi.binner.bin_size) # hd, speed = get_hd(trajectory=self.bmi_hd_buf[-window_size:], speed_threshold=0.6, offset_hd=0) # hd = 90 # if speed > .6: # self.bmi_hd[:] = torch.tensor(hd) # sent to Jovian # self.current_hd[:] = torch.tensor(hd) # sent to Mazeview # self.emit('bmi_update', pos=self.teleport_pos) # self.log.info('\n') self.log.info('BMI output(x,y,speed,ball_thres): {0:.2f}, {1:.2f}, {2:.2f}, {3:.2f}'.format(_teleport_pos[0], _teleport_pos[1], speed, ball_vel_thres)) except Exception as e: self.log.warn('BMI error: {}'.format(e)) pass def set_trigger(self, shared_cue_dict): '''shared_cue_dict is a a shared memory dict between processes contains cue name and position: shared_cue_dict := {cue_name: cue_pos, ...} ''' self.shared_cue_dict = shared_cue_dict self.log.info('-----------------------------------------------------------------------------------------') self.log.info('jovian and maze_view is connected, they starts to share cues position and transformations') self.log.info('-----------------------------------------------------------------------------------------') def task_routine(self): ''' jov emit necessary event to task by going through `task_routine` at each frame (check _jovian_process) One can flexibly define his/her own task_routine. It provides the necessary event for the task fsm at frame rate. ''' self.cnt.add_(1) if self.cnt == 1: self.emit('start') # if self.cnt%2 == 0: self.emit('frame') self.check_touch_agent_to_cue() # JUMPER, one_cue, two_cue, moving_cue etc.. self.check_touch_cue_to_cue() # JEDI def check_touch_agent_to_cue(self): for _cue_name in self.shared_cue_dict.keys(): if is_close(self.current_pos, torch.tensor(self.shared_cue_dict[_cue_name]), self.touch_radius): self.emit('touch', args=( _cue_name, self.shared_cue_dict[_cue_name] )) def check_touch_cue_to_cue(self): # here let's assume that there are only two cues to check _cue_name_0, _cue_name_1 = list(self.shared_cue_dict.keys()) if is_close(torch.tensor(self.shared_cue_dict[_cue_name_0]), torch.tensor(self.shared_cue_dict[_cue_name_1]), self.touch_radius): self.emit('touch', args=( _cue_name_0 + '->' + _cue_name_1, self.shared_cue_dict[_cue_name_0] )) def start(self): self.rot.start() self.pipe_jovian_side, self.pipe_gui_side = Pipe() self.jovian_process = Process(target=self._jovian_process, name='jovian') #, args=(self.pipe_jovian_side,) self.jovian_process.daemon = True self.reset() # !!! reset immediately before start solve the first time jam issue self.jovian_process.start() def stop(self): self.jovian_process.terminate() self.jovian_process.join() self.cnt.fill_(0) self.rot.stop() def get(self): return self.pipe_gui_side.recv().decode("utf-8") def toggle_motion(self): cmd = "console.toggle_motion()\n" self.output.send(cmd.encode()) def teleport(self, prefix, target_pos, head_direction=None, target_item=None): ''' Jovian abstract (output): https://github.com/chongxi/playground/issues/6 Core function: This is the only function that send `events` back to Jovian from interaction ''' try: x, y, z = target_pos # the coordination except: x, y = target_pos z = 0 if head_direction is None: v = 0 else: v = head_direction if prefix == 'console': # teleport animal, target_item is None cmd = "{}.teleport({},{},{},{})\n".format('console', x, y, 5, v) self.output.send(cmd.encode()) elif prefix == 'model': # move cue with Timer('', verbose = ENABLE_PROFILER): z += self.shared_cue_height[target_item] cmd = "{}.move('{}',{},{},{})\n".format('model', target_item, x, y, z) self.output.send(cmd.encode()) bottom = z - self.shared_cue_height[target_item] self.shared_cue_dict[target_item] = self._to_jovian_coord(np.array([x,y,bottom], dtype=np.float32)) def move_to(self, x, y, z=5, hd=0, hd_offset=0): ''' x,y = 0,0 # goes to the center (Jovian protocol) hd_offset = jov.rot.direction # the body direction ''' cmd="{}.teleport({},{},{},{})\n".format('console', x, y, z, hd+hd_offset) self.output.send(cmd.encode()) def reward(self, time): self.log.info('reward {}'.format(time)) try: cmd = 'reward, {}'.format(time) self.pynq.send(cmd.encode()) except: self.log.info('fail to send reward command - pynq connected: {}'.format(self.pynq_connected))
class MultiprocessIterator(DataIterator): """ Wraps another ```DataIterator``` and uses it to generate tensor dicts using multiple processes. Parameters ---------- base_iterator : ``DataIterator`` The ``DataIterator`` for generating tensor dicts. It will be shared among processes, so it should not be stateful in any way. num_workers : ``int``, optional (default = 1) The number of processes used for generating tensor dicts. output_queue_size : ``int``, optional (default = 1000) The size of the output queue on which tensor dicts are placed to be consumed. You might need to increase this if you're generating tensor dicts too quickly. """ def __init__(self, base_iterator: DataIterator, num_workers: int = 1, output_queue_size: int = 1000) -> None: super().__init__() self.num_workers = num_workers self.batch_size = base_iterator._batch_size self.output_queue_size = output_queue_size # These two options make the iterator stateful, which means it can't be shared # across multiple processes. if base_iterator._cache_instances: raise ConfigurationError( "cannot use Multiprocess iterator with cache_instances") if base_iterator._instances_per_epoch: raise ConfigurationError( "cannot use instances_per_epoch with Multiprocess iterator") self.iterator = base_iterator self.processes: List[Process] = [] self.queuer: Optional[Process] = None def _create_batches(self, instances: Iterable[Instance], shuffle: bool) -> Iterable[Batch]: raise RuntimeError("MultiprocessIterator doesn't use create_batches") def index_with(self, vocab: Vocabulary): self.iterator.index_with(vocab) def _call_with_instances(self, instances: Iterable[Instance], num_epochs: int, shuffle: bool) -> Iterator[TensorDict]: # JoinableQueue needed here as sharing tensors across processes # requires that the creating process not exit prematurely. output_queue = JoinableQueue(self.output_queue_size) input_queue = Queue(self.output_queue_size * self.batch_size) # Start process that populates the queue. self.queuer = Process(target=_queuer, args=(instances, input_queue, self.num_workers, num_epochs)) self.queuer.start() # Start the tensor-dict workers. for i in range(self.num_workers): args = (input_queue, output_queue, self.iterator, shuffle, i) process = Process(target=_create_tensor_dicts_from_queue, args=args) process.start() self.processes.append(process) num_finished = 0 while num_finished < self.num_workers: item = output_queue.get() output_queue.task_done() if isinstance(item, int): num_finished += 1 logger.info( f"worker {item} finished ({num_finished} / {self.num_workers})" ) else: yield item for process in self.processes: process.join() self.processes.clear() if self.queuer is not None: self.queuer.join() self.queuer = None def _call_with_qiterable(self, qiterable: QIterable, num_epochs: int, shuffle: bool) -> Iterator[TensorDict]: # JoinableQueue needed here as sharing tensors across processes # requires that the creating tensor not exit prematurely. output_queue = JoinableQueue(self.output_queue_size) for _ in range(num_epochs): qiterable.start() # Start the tensor-dict workers. for i in range(self.num_workers): args = (qiterable, output_queue, self.iterator, shuffle, i) process = Process(target=_create_tensor_dicts_from_qiterable, args=args) process.start() self.processes.append(process) num_finished = 0 while num_finished < self.num_workers: item = output_queue.get() output_queue.task_done() if isinstance(item, int): num_finished += 1 logger.info( f"worker {item} finished ({num_finished} / {self.num_workers})" ) else: yield item for process in self.processes: process.join() self.processes.clear() qiterable.join() def __call__(self, instances: Iterable[Instance], num_epochs: int = None, shuffle: bool = True) -> Iterator[TensorDict]: # If you run it forever, the multiprocesses won't shut down correctly. # TODO(joelgrus) find a solution for this if num_epochs is None: raise ConfigurationError( "Multiprocess Iterator must be run for a fixed number of epochs" ) if isinstance(instances, QIterable): return self._call_with_qiterable(instances, num_epochs, shuffle) else: return self._call_with_instances(instances, num_epochs, shuffle) def __del__(self) -> None: """ Terminate processes if the user hasn't joined implicitly by consuming all the tensors. This is necessary as leaving stray processes running can corrupt shared state. In brief, we've observed shared memory counters being reused (when the memory was free from the perspective of the parent process) while the stray workers still held a reference to them. For a discussion of using destructors in Python in this manner, see https://eli.thegreenplace.net/2009/06/12/safely-using-destructors-in-python/. """ for process in self.processes: process.terminate() if self.queuer is not None: self.queuer.terminate()
def kp_detection(db, cfg_file, nnet, result_dir, debug=False, no_flip = False, decode_func=kp_decode): image_idx = 0 debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:100] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] num_images = db_inds.size K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] im_queue = Queue() det_queue = Queue() top_bboxes_queue = Queue() im_process_task = Process(target=image_preprocess, args=(db, cfg_file, db_inds, scales, result_dir, debug, no_flip, im_queue)) post_process_task = Process(target=post_process, args=(db, debug, num_images, weight_exp, merge_bbox, categories, nms_threshold, max_per_image, nms_algorithm, det_queue, top_bboxes_queue)) im_process_task.start() post_process_task.start() start = time.time() for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): detections = [] detections_ori = [] detections_flip = [] for scale in scales: pre_data = im_queue.get(block=True) images = pre_data[0] ratios = pre_data[1] borders = pre_data[2] sizes = pre_data[3] out_width = pre_data[4] image_id = pre_data[5] dets = decode_func(nnet, images, K, no_flip, ae_threshold=ae_threshold, kernel=nms_kernel, image_idx = image_idx) image_idx += 1 if no_flip: dets = dets.reshape(1, -1, 8) _rescale_dets(dets, ratios, borders, sizes) dets[:, :, 0:4] /= scale detections.append(dets) else: dets = dets.reshape(2, -1, 8) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] dets = dets.reshape(1, -1, 8) _rescale_dets(dets, ratios, borders, sizes) dets[:, :, 0:4] /= scale detections_ori.append(dets[:,:int(dets.shape[1]/2),:]) detections_flip.append(dets[:,int(dets.shape[1]/2):,:]) if no_flip: detections = np.concatenate(detections, axis=1) classes = detections[..., -1] classes = classes[0]#[int(detections.shape[1]/2):] detections = detections[0]#[int(detections.shape[1]/2):] # reject detections with negative scores keep_inds = (detections[:, 4] > 0) detections = detections[keep_inds] classes = classes[keep_inds] else: detections_ori_ = np.concatenate(detections_ori, axis=1) detections_flip_= np.concatenate(detections_flip, axis=1) detections = np.concatenate((detections_ori_, detections_flip_), axis=1) detections1 = detections[0][:int(detections.shape[1]/2),:] detections2 = detections[0][int(detections.shape[1]/2):,:] keep_inds1 = (detections1[:, 4] > 0) keep_inds2 = (detections2[:, 4] > 0) detections_G1 = torch.from_numpy(detections1[keep_inds1]).cuda() detections_G2 = torch.from_numpy(detections2[keep_inds2]).cuda() detections_G1[:,4] = 0 detections_G2[:,4] = 0 detections1_matrix = detections_G1.permute(1,0).unsqueeze(-1).expand(8, detections_G1.size(0), detections_G2.size(0)).contiguous() detections2_matrix = detections_G2.permute(1,0).unsqueeze(1).expand(8, detections_G1.size(0), detections_G2.size(0)).contiguous() cls_inds = (detections1_matrix[-1,...] == detections2_matrix[-1, ...]) select_detections1 = detections1_matrix[:4, cls_inds].permute(1,0).contiguous() select_detections2 = detections2_matrix[:4, cls_inds].permute(1,0).contiguous() overlaps = bbox_overlaps(select_detections1, select_detections2, is_aligned = True) if overlaps.size(0) > 0: detections1_conf = overlaps detections2_conf = overlaps detections1_matrix[4, cls_inds] = detections1_conf detections2_matrix[4, cls_inds] = detections2_conf detections1_conf_max = detections1_matrix[4,:,:].max(1)[0] detections2_conf_max = detections2_matrix[4,:,:].max(0)[0] conf_max = torch.cat([detections1_conf_max,detections2_conf_max], dim = 0).data.cpu().numpy() conf_max[conf_max<0.3] = 0 ################################################################################## classes = detections[..., -1] classes = classes[0]#[int(detections.shape[1]/2):] detections = detections[0]#[int(detections.shape[1]/2):] # reject detections with negative scores keep_inds = (detections[:, 4] > 0) detections = detections[keep_inds] classes = classes[keep_inds] if overlaps.size(0) > 0: detections[:,4] += detections[:,4] * conf_max keep_inds = (detections[:, 4] > 0) detections = detections[keep_inds] classes = classes[keep_inds] det_queue.put([detections, classes, image_id]) top_bboxes = top_bboxes_queue.get(block=True) elapsed = time.time() - start print('Average FPS: {}\n'.format(round(num_images/elapsed, 2))) im_process_task.terminate() post_process_task.terminate() result_json = os.path.join(result_dir, "results.json") detections = db.convert_to_coco(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) cls_ids = list(range(1, categories + 1)) image_ids = [db.image_ids(ind) for ind in db_inds] db.evaluate(result_json, cls_ids, image_ids) return 0
class Server(object): def __init__( self, torch_obj: TorchObj, master_url: str = None, port: int = 3000, acquire_lock: bool = False, early_stop_patience:int = 1, window_len: int = 4 ): torch_obj, _ = load_base_torch(torch_obj) self.torch_obj = load_torch_model(torch_obj) self.model = self.torch_obj.model self.state_dict = self.model.state_dict() self.criterion = self.torch_obj.criterion self.optimizer = self.torch_obj.optimizer self.master_url = master_url self.port = port self.error_count = 0 self.acquire_lock = acquire_lock self.window_len = window_len self.loss_window = [] self.should_stop = False self.early_stop_patience=early_stop_patience self.server = Process(target=self.start_service) @staticmethod def determine_master(port: int): try: master_url = socket.gethostbyname(socket.gethostname()) + ':' + str(port) return master_url except: return 'localhost:' + str(port) def start_server(self): self.server.start() self.master_url = Server.determine_master(self.port) def stop_server(self): self.server.terminate() self.server.join() def start_service(self): app = Flask(__name__) self.app = app self.model.train() self.model.share_memory() lock = RWLock() lock_acquired = self.acquire_lock window_len = self.window_len early_stopper = EarlyStopping(patience=max(self.early_stop_patience, 1)) @app.route('/') def home(): return 'sparktorch' @app.route('/parameters', methods=['GET']) def get_parameters(): if lock_acquired: lock.acquire_write() state = dill.dumps(self.model.state_dict()) if lock_acquired: lock.release() return state @app.route('/losses', methods=['POST']) def process_loss(): if self.should_stop: return { 'stop': True } loss = request.json['loss'] self.loss_window.append(loss) if len(self.loss_window) > window_len: loss = sum(self.loss_window) / len(self.loss_window) self.loss_window = [] if early_stopper.step(loss): self.should_stop=True return { "stop": True } return { "stop": False } @app.route('/update', methods=['POST']) def update_parameters(): if lock_acquired: lock.acquire_write() try: gradients = dill.loads(request.data) for index, param in enumerate(self.model.parameters()): param.grad = gradients[index] self.optimizer.step() except Exception as e: self.error_count += 1 if self.error_count > 10: raise RuntimeError(f"Max Errors {str(e)}") finally: if lock_acquired: lock.release() return 'completed' self.app.run(host='0.0.0.0', use_reloader=False, threaded=True, port=self.port)
class VideoProcessingPipeline(object): """ Manages the acquisition and preprocessing of video frames from the webcam. A pipeline with two processes is used: the first process denoises frames and queues the result to the second process which calculates the optical flows on CPU, and queues back the moving average to the main process. This moving average is used as attention prior by the model. """ def __init__(self, img_size, img_cfg, frames_window=13, flows_window=5, skip_frames=2, cam_res=(640, 480), denoising=True): """ :param img_size: the images input size of the neural network. :param img_cfg: the config parameters for image processing. :param frames_window: the number of webcam frames input at once into the neural network to make a prediction step. Best results tend to be obtained for roughly a bit less than one second. :param flows_window: the number of optical flows used to calculate an attention prior. Defaults to 5. Change at your own risks. :param skip_frames: down-sampling factor of the webcam frames. Defaults to 2 in order to roughly obtain 15 FPS with a 30 FPS webcam. This down-sampling is basic and could be improved to support ratios such as 2/3 to obtain 20 FPS. :param cam_res: webcam resolution (width, height). The application was only tested in 640x480. Change at your own risks. :param denoising: activate the denoising process. Defaults to True. Most usefull with low quality webcams. """ if frames_window not in [9, 13, 17, 21]: raise ValueError('Invalid window size for webcam frames: `%s`' % str(frames_window)) if flows_window not in [3, 5, 7, 9]: raise ValueError('Invalid window size for optical flows: `%s`' % str(flows_window)) if flows_window > frames_window: raise ValueError( 'Optical flow window cannot be wider than camera frames window' ) self.img_size = img_size # optical flows can be computed in lower resolution w/o harming results self.opt_size = img_size // 2 self.frames_window = frames_window self.flows_window = flows_window self.skip_frames = skip_frames self.total_frames = 0 # total number of frames acquired self.cam_res = cam_res self.denoising = denoising self.img_frames = [ np.zeros((self.img_size, self.img_size, 3), dtype=np.uint8) ] * (self.frames_window // 2) self.gray_frames = [ np.zeros((self.opt_size, self.opt_size), dtype=np.uint8) ] * (self.frames_window // 2) self.priors = [] # init multiprocessing self.q_parent, self.q_prior = Queue(), Queue() # start denoising process if self.denoising: self.q_denoise = Queue() self.p_denoise = Process( target=denoise_frame, args=(self.q_denoise, self.q_prior, img_cfg.getint('h'), img_cfg.getint('template_window_size'), img_cfg.getint('search_window_size'))) self.p_denoise.start() print('Denoising enabled') else: print('Denoising disabled') # start prior calculation process self.p_prior = Process(target=calc_attention_prior, args=(self.opt_size, self.flows_window, self.q_prior, self.q_parent)) self.p_prior.start() # initialise camera self.cap = cv.VideoCapture(0) if self.cap.isOpened(): self.cap_fps = int(round(self.cap.get(cv.CAP_PROP_FPS))) self.cap.set(3, self.cam_res[0]) self.cap.set(4, self.cam_res[1]) print('Device @%d FPS' % self.cap_fps) else: raise IOError('Failed to open webcam capture') # raw images self.last_frame = collections.deque(maxlen=self.cap_fps) # cropped region of the raw images self.last_cropped_frame = collections.deque(maxlen=self.cap_fps) # acquire and preprocess the exact number of frames needed # to make the first prior map for i in range((frames_window // 2) + 1): self.acquire_next_frame(enable_skip=False) # now wait for the first prior to be returned while len(self.priors) == 0: if not self.q_parent.empty(): # de-queue a prior prior, flow = self.q_parent.get(block=False) self.priors.append(prior) # sleep while the queue is empty time.sleep(0.01) def _center_crop(self, img, target_shape): """ Returns a center crop of the provided image. :param img: the image to crop. :param target_shape: the dimensions of the crop. :return the cropped image """ h, w = target_shape y, x = img.shape[:2] start_y = max(0, y // 2 - (h // 2)) start_x = max(0, x // 2 - (w // 2)) return img[start_y:start_y + h, start_x:start_x + w] def acquire_next_frame(self, enable_skip=True): """ Reads the next frame from the webcam and starts the asynchronous preprocessing. The video stream is down-sampled as necessary to reach the desired FPS. :param enable_skip: enables down-sampling of the webcam stream. Must be True except during initialisation. :return: the last frame acquired or None if that frame was skipped due to down-sampling of the webcam stream. """ ret, frame = self.cap.read() if not ret: self.terminate() raise IOError('Failed to read the next frame from webcam') self.total_frames += 1 if not enable_skip: return self._preprocess_frame(frame) elif (self.total_frames % self.skip_frames) == 0: return self._preprocess_frame(frame) return None def _preprocess_frame(self, frame): """ Crops, change to gray scale, resizes and sends the newly acquired webcam frame to the preprocessing pipeline. :param frame: the last acquired frame. :return the last acquired frame. """ # crop a square at the center of the frame rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB) rgb = self._center_crop(rgb, (self.cam_res[1], self.cam_res[1])) self.last_frame.append(frame) self.last_cropped_frame.append(rgb) # convert to gray scale and resize gray = cv.cvtColor(rgb, cv.COLOR_RGB2GRAY) gray = cv.resize(gray, (self.opt_size, self.opt_size)) rgb = cv.resize(rgb, (self.img_size, self.img_size)) # queue to relevant child process if self.denoising: self.q_denoise.put(gray) else: self.q_prior.put(gray) self.img_frames.append(rgb) self.gray_frames.append(gray) return frame def get_model_input(self, dequeue=True): """ Gets the list of images and the prior needed for the inference of the current frame. Use `dequeue` to retrieve the next prior from the queue. The caller must first verify that the queue is non-empty. :param dequeue: must be set to True except during initialisation. :return: images ndarray and the corresponding prior """ # de-queue a prior if dequeue: prior, flow = self.q_parent.get(block=False) self.priors.append(prior) # ensure enough frames have been preprocessed n_frames = self.frames_window assert len(self.img_frames) >= n_frames assert len(self.gray_frames) >= n_frames assert len(self.priors) == 1 imgs = np.stack(self.img_frames[:self.frames_window], axis=0) self.img_frames.pop(0) # slide window to the right self.gray_frames.pop(0) return imgs, [self.priors.pop(0)] def terminate(self): """Terminates processes, closes queues and releases video capture.""" if self.denoising: self.q_denoise.put(None) time.sleep(0.2) self.p_denoise.terminate() else: self.q_prior.put(None) time.sleep(0.2) self.p_prior.terminate() time.sleep(0.1) if self.denoising: self.p_denoise.join(timeout=0.5) self.p_prior.join(timeout=0.5) if self.denoising: self.q_denoise.close() self.q_parent.close() self.cap.release()
class Base(object): def __init__(self): self.epoch = 0 self.iteration = 0 self.offset = 0 # for multiprocessing self._epoch = 0 # Setting for multiprocessing self.preloading_process = None self.queue = Queue() self.queue_size = 0 def count_vocab_size(self, dict_path): vocab_count = 1 # for <blank> with codecs.open(dict_path, 'r', 'utf-8') as f: for line in f: if line.strip() != '': vocab_count += 1 return vocab_count def __len__(self): return len(self.df) def __getitem__(self, index): raise NotImplementedError() def __iter__(self): """Returns self.""" return self @property def epoch_detail(self): # percentage of the current epoch return self.offset / len(self) def next(self, batch_size=None): """Generate each mini-batch. Args: batch_size (int): size of mini-batch Returns: batch (tuple): is_new_epoch (bool): If true, 1 epoch is finished """ if batch_size is None: batch_size = self.batch_size if self.n_ques is None: if self.max_epoch is not None and self.epoch >= self.max_epoch: raise StopIteration # NOTE: max_epoch == None means infinite loop data_indices, is_new_epoch = self.sample_index(batch_size) batch = self.make_batch(data_indices) self.iteration += len(data_indices) else: # Clean up multiprocessing if self.preloading_process is not None and self.queue_size == 0: self.preloading_process.terminate() self.preloading_process.join() if self.max_epoch is not None and self.epoch >= self.max_epoch: # Clean up multiprocessing self.preloading_process.terminate() self.preloading_process.join() raise StopIteration # NOTE: max_epoch == None means infinite loop # Enqueue mini-batches if self.queue_size == 0: self.df_indices_list = [] self.is_new_epoch_list = [] for _ in range(self.n_ques): data_indices, is_new_epoch = self.sample_index(batch_size) self.df_indices_list.append(data_indices) self.is_new_epoch_list.append(is_new_epoch) self.preloading_process = Process(self.preloading_loop, args=(self.queue, self.df_indices_list)) self.preloading_process.start() self.queue_size += self.n_ques time.sleep(3) self.iteration += len(self.df_indices_list[self.n_ques - self.queue_size]) self.queue_size -= 1 batch = self.queue.get() is_new_epoch = self.is_new_epoch_list.pop(0) if is_new_epoch: self.epoch += 1 return batch, is_new_epoch def sample_index(self, batch_size): """Sample data indices of mini-batch. Args: batch_size (int): the size of mini-batch Returns: data_indices (np.ndarray): is_new_epoch (bool): """ is_new_epoch = False if self.discourse_aware: n_utt = min(self.n_utt_session_dict_epoch.keys()) assert self.utt_offset < n_utt data_indices = [ self.df[self.session_offset_dict[session_id] + self.utt_offset:self.session_offset_dict[session_id] + self.utt_offset + 1].index[0] for session_id in self.n_utt_session_dict_epoch[n_utt][:batch_size] ] self.utt_offset += 1 if self.utt_offset == n_utt: if len(self.n_utt_session_dict_epoch[n_utt][batch_size:]) > 0: self.n_utt_session_dict_epoch[ n_utt] = self.n_utt_session_dict_epoch[n_utt][ batch_size:] else: self.n_utt_session_dict_epoch.pop(n_utt) self.utt_offset = 0 # reset for the new epoch if len(self.n_utt_session_dict_epoch.keys()) == 0: self.n_utt_session_dict_epoch = copy.deepcopy( self.n_utt_session_dict) is_new_epoch = True self._epoch += 1 elif self.sort_by_input_length or not self.shuffle: if self.sort_by_input_length: # Change batch size dynamically min_xlen = self.df[self.offset:self.offset + 1]['xlen'].values[0] min_ylen = self.df[self.offset:self.offset + 1]['ylen'].values[0] batch_size_tmp = self.select_batch_size( batch_size, min_xlen, min_ylen) else: batch_size_tmp = batch_size if len(self.rest) > batch_size_tmp: data_indices = list(self.df[self.offset:self.offset + batch_size_tmp].index) self.rest -= set(data_indices) # NOTE: rest is in uttrance length order when sort_by_input_length == True # NOTE: otherwise in name length order when shuffle == False self.offset += len(data_indices) else: # Last mini-batch data_indices = list(self.df[self.offset:self.offset + len(self.rest)].index) self._reset() is_new_epoch = True self._epoch += 1 if self._epoch == self.sort_stop_epoch: self.sort_by_input_length = False self.shuffle = True else: # Randomly sample uttrances if len(self.rest) > batch_size: data_indices = random.sample(list(self.rest), batch_size) self.rest -= set(data_indices) else: # Last mini-batch data_indices = list(self.rest) self._reset() is_new_epoch = True self._epoch += 1 self.offset += len(data_indices) return data_indices, is_new_epoch def select_batch_size(self, batch_size, min_xlen, min_ylen): if not self.dynamic_batching: return batch_size if min_xlen <= 800: pass elif min_xlen <= 1600 or 70 < min_ylen <= 100: batch_size = int(batch_size / 2) else: batch_size = int(batch_size / 4) if batch_size < 1: batch_size = 1 return batch_size def reset(self): self._reset() self.queue = Queue() self.queue_size = 0 # Clean up multiprocessing if self.preloading_process is not None: self.preloading_process.terminate() self.preloading_process.join() def _reset(self): """Reset data counter and offset.""" self.rest = set(list(self.df.index)) self.offset = 0 def preloading_loop(self, queue, df_indices_list): """. Args: queue (): df_indices_list (np.ndarray): """ for i in range(len(df_indices_list)): queue.put(self.make_batch(df_indices_list[i]))
class Base(object): def __init__(self, *args, **kwargs): self.epoch = 0 self.iteration = 0 self.offset = 0 # for multiprocessing self._epoch = 0 # Setting for multiprocessing self.preloading_process = None self.queue = Queue() self.queue_size = 0 # Read the vocabulary file vocab_count = 0 with codecs.open(kwargs['vocab_file_path'], 'r', 'utf-8') as f: for line in f: if line.strip() != '': vocab_count += 1 self.num_classes = vocab_count if 'vocab_file_path_sub' in kwargs.keys(): vocab_count_sub = 0 with codecs.open(kwargs['vocab_file_path_sub'], 'r', 'utf-8') as f: for line in f: vocab_count_sub += 1 self.num_classes_sub = vocab_count_sub def __len__(self): return len(self.df) def __getitem__(self, index): feature = self.load_npy(self.df['input_path'][index]) transcript = self.df['transcript'][index] return (feature, transcript) def __iter__(self): """Returns self.""" return self @property def pad_value(self): return -1 if not self.is_test else None @property def epoch_detail(self): # Floating point version of epoch return self.epoch + self.offset / len(self) @property def current_batch_size(self): return self._current_batch_size def __next__(self, batch_size=None): """Generate each mini-batch. Args: batch_size (int, optional): the size of mini-batch Returns: batch (tuple): is_new_epoch (bool): If true, 1 epoch is finished """ if batch_size is None: batch_size = self.batch_size if self.num_enque is None: if self.max_epoch is not None and self.epoch >= self.max_epoch: raise StopIteration # NOTE: max_epoch == None means infinite loop data_indices, is_new_epoch = self.sample_index(batch_size) self._current_batch_size = len(data_indices) batch = self.make_batch(data_indices) self.iteration += len(data_indices) else: # Clean up multiprocessing if self.preloading_process is not None and self.queue_size == 0: self.preloading_process.terminate() self.preloading_process.join() if self.max_epoch is not None and self.epoch >= self.max_epoch: # Clean up multiprocessing self.preloading_process.terminate() self.preloading_process.join() raise StopIteration # NOTE: max_epoch == None means infinite loop # Enqueue mini-batches if self.queue_size == 0: self.data_indices_list = [] self.is_new_epoch_list = [] for _ in range(self.num_enque): data_indices, is_new_epoch = self.sample_index(batch_size) self._current_batch_size = len(data_indices) self.data_indices_list.append(data_indices) self.is_new_epoch_list.append(is_new_epoch) self.preloading_process = Process( target=self.preloading_loop, args=(self.queue, self.data_indices_list)) self.preloading_process.start() self.queue_size += self.num_enque time.sleep(3) # print(self.queue.qsize()) # print(self.queue_size) self.iteration += len(self.data_indices_list[self.num_enque - self.queue_size]) self.queue_size -= 1 batch = self.queue.get() is_new_epoch = self.is_new_epoch_list.pop(0) if is_new_epoch: self.epoch += 1 return batch, is_new_epoch def next(self, batch_size=None): # For python2 return self.__next__(batch_size) def sample_index(self, batch_size): """Sample data indices of mini-batch. Args: batch_size (int): the size of mini-batch Returns: data_indices (np.ndarray): is_new_epoch (bool): """ is_new_epoch = False if self.sort_utt or not self.shuffle: if self.sort_utt: # Change batch size dynamically min_frame_num_batch = self.df[self.offset:self.offset + 1]['frame_num'].values[0] batch_size_tmp = self.select_batch_size( batch_size, min_frame_num_batch) # NOTE: this depends on each corpus else: batch_size_tmp = batch_size if len(self.rest) > batch_size_tmp: df_tmp = self.df[self.offset:self.offset + batch_size_tmp] data_indices = list(df_tmp.index) self.rest -= set(data_indices) # NOTE: rest is in uttrance length order when sort_utt == True # NOTE: otherwise in name length order when shuffle == False self.offset += len(data_indices) else: # Last mini-batch data_indices = list(self.rest) self._reset() is_new_epoch = True self._epoch += 1 if self._epoch == self.sort_stop_epoch: self.sort_utt = False self.shuffle = True # Shuffle data in the mini-batch # random.shuffle(data_indices) # Sort in the descending order for pytorch data_indices = data_indices[::-1] else: # Randomly sample uttrances if len(self.rest) > batch_size: data_indices = random.sample(list(self.rest), batch_size) self.rest -= set(data_indices) else: # Last mini-batch data_indices = list(self.rest) self._reset() is_new_epoch = True self._epoch += 1 # Shuffle selected mini-batch random.shuffle(data_indices) return data_indices, is_new_epoch def select_batch_size(self, batch_size, min_frame_num_batch): raise NotImplementedError def reset(self): self._reset() self.queue = Queue() self.queue_size = 0 # Clean up multiprocessing if self.preloading_process is not None: self.preloading_process.terminate() self.preloading_process.join() def _reset(self): """Reset data counter and offset.""" self.rest = set(list(self.df.index)) self.offset = 0 def load(self, path): ext = os.path.basename(path).split('.')[-1] if ext == 'npy': return self._load_npy(path) elif ext == 'htk': return self._load_htk(path) def _load_npy(self, path): """Load npy files. Args: path (string): Returns: input_data (np.ndarray): A tensor of size (frame_num, feature_dim) """ return np.load(path) def _load_htk(htk_path): """Load each HTK file. Args: htk_path (string): path to a HTK file Returns: input_data (np.ndarray): A tensor of size (frame_num, feature_dim) """ with open(htk_path, "rb") as f: # Read header spam = f.read(12) frame_num, sampPeriod, sampSize, parmKind = unpack(">IIHH", spam) # Read data feature_dim = int(sampSize / 4) f.seek(12, 0) input_data = np.fromfile(f, 'f') input_data = input_data.reshape(-1, feature_dim) input_data.byteswap(True) return input_data def split_per_device(self, x, num_gpus): if num_gpus > 1: return np.array_split(x, num_gpus, axis=0) else: return x[np.newaxis] def preloading_loop(self, queue, data_indices_list): """ Args: queue (): data_indices_list (np.ndarray): """ # print("Pre-loading started.") for i in range(len(data_indices_list)): queue.put(self.make_batch(data_indices_list[i]))
class DataLoader(object): def __init__(self, minibatchlist, images_path, n_workers=1, multi_view=False, use_triplets=False, infinite_loop=True, max_queue_len=4, is_training=False, apply_occlusion=False, occlusion_percentage=0.5): """ A Custom dataloader to work with our datasets, and to prepare data for the different models (inverse, priors, autoencoder, ...) :param minibatchlist: ([np.array]) list of observations indices (grouped per minibatch) :param images_path: (np.array) Array of path to images :param n_workers: (int) number of preprocessing worker (load and preprocess each image) :param multi_view: (bool) :param use_triplets: (bool) :param infinite_loop: (bool) whether to have an iterator that can be resetted, set to False, it :param max_queue_len: (int) Max number of minibatches that can be preprocessed at the same time :param apply_occlusion: is the use of occlusion enabled - when using DAE (bool) :param occlusion_percentage: max percentage of occlusion when using DAE (float) :param is_training: (bool) Set to True, the dataloader will output both `obs` and `next_obs` (a tuple of th.Tensor) Set to false, it will only output one th.Tensor. """ super(DataLoader, self).__init__() self.n_workers = n_workers self.infinite_loop = infinite_loop self.n_minibatches = len(minibatchlist) self.minibatchlist = minibatchlist self.images_path = images_path self.shuffle = is_training self.queue = Queue(max_queue_len) self.process = None self.use_triplets = use_triplets self.multi_view = multi_view # apply occlusion for training a DAE self.apply_occlusion = apply_occlusion self.occlusion_percentage = occlusion_percentage self.startProcess() @staticmethod def createTestMinibatchList(n_samples, batch_size): """ Create list of minibatch for plotting :param n_samples: (int) :param batch_size: (int) :return: ([np.array]) """ minibatchlist = [] for i in range(n_samples // batch_size + 1): start_idx = i * batch_size end_idx = min(n_samples, (i + 1) * batch_size) minibatchlist.append(np.arange(start_idx, end_idx)) return minibatchlist def startProcess(self): """Start preprocessing process""" self.process = Process(target=self._run) # Make it a deamon, so it will be deleted at the same time # of the main process self.process.daemon = True self.process.start() def _run(self): start = True with Parallel(n_jobs=self.n_workers, batch_size="auto", backend="threading") as parallel: while start or self.infinite_loop: start = False if self.shuffle: indices = np.random.permutation(self.n_minibatches).astype( np.int64) else: indices = np.arange(len(self.minibatchlist), dtype=np.int64) for minibatch_idx in indices: batch_noisy, batch_obs_noisy, batch_next_obs_noisy = None, None, None if self.shuffle: images = np.stack( (self.images_path[ self.minibatchlist[minibatch_idx]], self.images_path[self.minibatchlist[minibatch_idx] + 1])) images = images.flatten() else: images = self.images_path[ self.minibatchlist[minibatch_idx]] if self.n_workers <= 1: batch = [ self._makeBatchElement(image_path, self.multi_view, self.use_triplets) for image_path in images ] if self.apply_occlusion: batch_noisy = [ self._makeBatchElement( image_path, self.multi_view, self.use_triplets, apply_occlusion=self.apply_occlusion, occlusion_percentage=self. occlusion_percentage) for image_path in images ] else: batch = parallel( delayed(self._makeBatchElement)( image_path, self.multi_view, self.use_triplets) for image_path in images) if self.apply_occlusion: batch_noisy = parallel( delayed(self._makeBatchElement) (image_path, self.multi_view, self.use_triplets, apply_occlusion=self.apply_occlusion, occlusion_percentage=self.occlusion_percentage ) for image_path in images) batch = th.cat(batch, dim=0) if self.apply_occlusion: batch_noisy = th.cat(batch_noisy, dim=0) if self.shuffle: batch_obs, batch_next_obs = batch[:len(images) // 2], batch[len(images ) // 2:] if batch_noisy is not None: batch_obs_noisy, batch_next_obs_noisy = batch_noisy[:len(images) // 2], \ batch_noisy[len(images) // 2:] self.queue.put( (minibatch_idx, batch_obs, batch_next_obs, batch_obs_noisy, batch_next_obs_noisy)) else: self.queue.put(batch) # Free memory if self.shuffle: del batch_obs del batch_next_obs if batch_noisy is not None: del batch_obs_noisy del batch_next_obs_noisy del batch del batch_noisy self.queue.put(None) @classmethod def _makeBatchElement(cls, image_path, multi_view=False, use_triplets=False, apply_occlusion=False, occlusion_percentage=None): """ :param image_path: (str) path to an image (without the 'data/' prefix) :param multi_view: (bool) :param use_triplets: (bool) :return: (th.Tensor) """ # Remove trailing .jpg if present image_path = 'data/' + image_path.split('.jpg')[0] if multi_view: images = [] # Load different view of the same timestep for i in range(2): im = cv2.imread("{}_{}.jpg".format(image_path, i + 1)) if im is None: raise ValueError( "tried to load {}_{}.jpg, but it was not found".format( image_path, i + 1)) images.append( preprocessImage(im, apply_occlusion=apply_occlusion, occlusion_percentage=occlusion_percentage)) #################### # loading a negative observation if use_triplets: # End of file format for positive & negative observations (camera 1) - length : 6 characters extra_chars = '_1.jpg' # getting path for all files of same record episode, e.g path_to_data/record_001/frame[0-9]{6}* digits_path = glob.glob(image_path[:-6] + '[0-9]*' + extra_chars) # getting the current & all frames' timesteps current = int(image_path[-6:]) # For all others extract last 6 digits (timestep) after removing the extra chars all_frame_steps = [ int(k[:-len(extra_chars)][-6:]) for k in digits_path ] # removing current positive timestep from the list all_frame_steps.remove(current) # negative timestep by random sampling length_set_steps = len(all_frame_steps) negative = all_frame_steps[random.randint( 0, length_set_steps - 1)] negative_path = '{}{:06d}'.format(image_path[:-6], negative) im3 = cv2.imread(negative_path + "_1.jpg") if im3 is None: raise ValueError( "tried to load {}_{}.jpg, but it was not found".format( negative_path, 1)) im3 = preprocessImage(im3) # stacking along channels images.append(im3) im = np.dstack(images) else: im = cv2.imread("{}.jpg".format(image_path)) if im is None: raise ValueError( "tried to load {}.jpg, but it was not found".format( image_path)) im = preprocessImage(im, apply_occlusion=apply_occlusion, occlusion_percentage=occlusion_percentage) # Channel first (for pytorch convolutions) + one dim for the batch # th.tensor creates a copy im = th.tensor(im.reshape((1, ) + im.shape).transpose(0, 3, 2, 1)) return im def __len__(self): return self.n_minibatches def __iter__(self): return self def __next__(self): while True: try: val = self.queue.get_nowait() break except queue.Empty: time.sleep(0.001) continue if val is None: raise StopIteration return val next = __next__ # Python 2 compatibility def __del__(self): if self.process is not None: self.process.terminate()
class DataLoader(object): def __init__(self, formula_path, dict_path, separate_conj_stmt=False, binary=False, part_no=-1, part_total=0, file_list=None, deepmath=False, norename=False, filter_abelian=False, compatible=False): # part_no, part_total: will not shuffle. self.formula_path = formula_path self.dict_path = dict_path self.maxsize = 500 # maxsize for async queue self.iter_ = 0 # epoch. Legacy reason for its name self.total_in_epoch = -1 # conj, stmt pairs supply in current epoch. self.total_iter = -1 # total iteration self.rename = not norename if not os.path.exists(dict_path): self.dict = self.build_dictionary() else: self.dict = torch.load(dict_path) self.queue = Queue(self.maxsize) self.reader = Process(target=self.read) self.dict_size = len(self.dict.keys()) self.separate_conj_stmt = separate_conj_stmt self.binary = binary self.part_no = part_no self.part_total = part_total if file_list is None: file_list = os.listdir(self.formula_path) if part_total != 0: file_list.sort() file_list = split_list(file_list, part_total, part_no) else: if part_total != 0: file_list = split_list(file_list, part_total, part_no) self.file_list = file_list self.deepmath = deepmath self.filter_abelian = filter_abelian self.compatible = compatible def start_reader(self): self.reader.daemon = True self.reader.start() def next_batch(self): # [conjecture, statement, label, conj_binary, stmt_binary] data = self.queue.get() if data is None: self.iter_ += 1 self.total_in_epoch = 0 else: self.total_in_epoch += 1 self.total_iter += 1 return data def build_dictionary(self): def _deter_name(node): node_name = node.name if node.type == NodeType.VAR: node_name = 'VAR' elif node.type == NodeType.VARFUNC: node_name == 'VARFUNC' return node_name files = os.listdir(self.formula_path) tokens = set({}) dicts = {} for i, a_file in enumerate(files): with open(os.path.join(self.formula_path, a_file), 'rb') as f: print('Loading file {}/{}'.format(i + 1, len(files))) dataset = pickle.load(f) for j, pair in enumerate(dataset): print('Processing pair {}/{}'.format(j + 1, len(dataset))) if self.rename: tokens.update([_deter_name(x) for x in pair[1]]) tokens.update([_deter_name(x) for x in pair[2]]) else: tokens.update([x.name for x in pair[1]]) tokens.update([x.name for x in pair[2]]) for i, x in enumerate(tokens): dicts[x] = i dicts['UNKNOWN'] = len(dicts) if 'VAR' not in dicts: dicts['VAR'] = len(dicts) if 'VARFUNC' not in dicts: dicts['VARFUNC'] = len(dicts) torch.save(dicts, self.dict_path) return dicts def _decide_name(self, node): node_name = node.name if self.rename: if node.type == NodeType.VAR: node_name = 'VAR' elif node.type == NodeType.VARFUNC: node_name == 'VARFUNC' if node_name not in self.dict: node_name = 'UNKNOWN' return node_name def generate_one_sentence(self, sentence): # Undirected graph # index1 starts, index2 ends index1 = [] index2 = [] onehot_collect = [] id2pos = {node.id: i for i, node in enumerate(sentence)} for i, node in enumerate(sentence): for x in node.incoming: index1.append(id2pos[x.id]) index2.append(id2pos[node.id]) for x in node.outgoing: index1.append(id2pos[x.id]) index2.append(id2pos[node.id]) node_name = self._decide_name(node) onehot_collect.append(self.dict[node_name]) index1 = np.array(index1) index2 = np.array(index2) mat = np.zeros((len(sentence), len(index2)), dtype=np.float32) for x in sentence: mat[id2pos[x.id], index2 == id2pos[x.id]] = 1.0 / np.sum( index2 == id2pos[x.id]) if self.compatible: onehot = np.zeros((len(sentence), self.dict_size), dtype=np.float32) onehot[range(len(sentence)), onehot_collect] = 1 index1 = torch.from_numpy(index1) index2 = torch.from_numpy(index2) if self.compatible: onehot = torch.from_numpy(onehot) else: onehot = torch.LongTensor(onehot_collect) mat = torch.from_numpy(mat) return (onehot, index1, index2, mat) def directed_generate_one_sentence(self, sentence): # Distinguish in-edges and out-edges # index1 starts, index2 ends iindex1 = [] iindex2 = [] oindex1 = [] oindex2 = [] id2pos = {node.id: i for i, node in enumerate(sentence)} onehot_collect = [] for node in sentence: for x in node.incoming: iindex1.append(id2pos[x.id]) iindex2.append(id2pos[node.id]) for x in node.outgoing: oindex1.append(id2pos[node.id]) oindex2.append(id2pos[x.id]) node_name = self._decide_name(node) onehot_collect.append(self.dict[node_name]) # Incoming iindex1 = np.array(iindex1) iindex2 = np.array(iindex2) oindex1 = np.array(oindex1) oindex2 = np.array(oindex2) imat = np.zeros((len(sentence), len(iindex2)), dtype=np.float32) omat = np.zeros((len(sentence), len(oindex1)), dtype=np.float32) for x in sentence: imat[id2pos[x.id], iindex2 == id2pos[x.id]] = 1.0 / ( np.sum(oindex1 == id2pos[x.id]) + np.sum(iindex2 == id2pos[x.id])) # Outgoing for x in sentence: omat[id2pos[x.id], oindex1 == id2pos[x.id]] = 1.0 / ( np.sum(oindex1 == id2pos[x.id]) + np.sum(iindex2 == id2pos[x.id])) if self.compatible: onehot = np.zeros((len(sentence), self.dict_size), dtype=np.float32) onehot[range(len(sentence)), onehot_collect] = 1 iindex1 = torch.from_numpy(iindex1) iindex2 = torch.from_numpy(iindex2) oindex1 = torch.from_numpy(oindex1) oindex2 = torch.from_numpy(oindex2) if self.compatible: onehot = torch.from_numpy(onehot) else: onehot = torch.LongTensor(onehot_collect) imat = torch.from_numpy(imat) omat = torch.from_numpy(omat) return (onehot, iindex1, iindex2, imat, oindex1, oindex2, omat) def generate_one_sentence_binary(self, sentence): # directed graph index = [] id2pos = {node.id: i for i, node in enumerate(sentence)} for node in sentence: if len(node.outgoing) > 1 and not (self.filter_abelian and node.name in COMM_OP): for i, n1 in enumerate(node.outgoing): for n2 in node.outgoing[i + 1:]: index.append(id2pos[node.id]) index.append(id2pos[n1.id]) index.append(id2pos[n2.id]) if len(node.outgoing) > 1 and (self.filter_abelian and node.name == '|-:c'): for n1 in node.outgoing[1:]: index.append(id2pos[node.id]) index.append(id2pos[node.outgoing[0].id]) index.append(id2pos[n1.id]) index = np.array(index) mat = np.zeros((len(sentence), len(index)), dtype=np.float32) for x in sentence: f = index == id2pos[x.id] if np.sum(f) > 0: mat[id2pos[x.id], f] = 1.0 / np.sum(f) #print (index.shape, mat.shape) if index.shape[0] > 0: return (torch.from_numpy(index.reshape(-1, 3).T), torch.from_numpy(mat)) else: #print (index.shape, mat.shape) return (torch.Tensor(1), torch.Tensor(1)) def read(self): files = self.file_list while True: random.shuffle(files) for a_file in files: with open(os.path.join(self.formula_path, a_file), 'rb') as f: content = pickle.load(f) random.shuffle(content) for x in content: flag, conj, stmt = x if self.separate_conj_stmt: self.queue.put( (self.directed_generate_one_sentence(conj), self.directed_generate_one_sentence(stmt), flag)) elif self.binary: self.queue.put( (self.directed_generate_one_sentence(conj), self.directed_generate_one_sentence(stmt), flag, self.generate_one_sentence_binary(conj), self.generate_one_sentence_binary(stmt))) else: self.queue.put((self.generate_one_sentence(conj), self.generate_one_sentence(stmt), flag)) self.queue.put(None) def destruct(self): self.reader.terminate()
def train_distributed( rdd: RDD, torch_obj: str, iters: int = 10, partition_shuffles: int = 1, verbose: int = 1, mini_batch: int = -1, validation_pct: float = 0.0, world_size: int = 2, device: str = 'cpu', early_stop_patience: int = -1 ) -> Dict: """ Entry point to train the model in distributed fashion. :param rdd: The rdd of data to run on the model. :param torch_obj: The torch object as a string that includes the model and param shapes. :param master_url: The main url for the driver. :param iters: Number of iterations for training. :param partition_shuffles: Number of partition shuffles (Need to implement) :param verbose: Verbosity of logs :param mini_batch: Mini batch for each iteration of training. :param validation_pct: How many items to validate :param world_size: number of partitions. :param device: pytorch device :return: The train dict. """ master_url = retrieve_url() torch_loaded, params = load_base_torch(torch_obj) # Start the driver process. p = Process( target=handle_model, args=(-1, None, params, master_url, iters, world_size, early_stop_patience) ) p.start() try: state_dict = None for i in range(partition_shuffles): # Run model with barrier execution mode. state_dict = mapPartitionsWithIndex( rdd, lambda i, x: handle_model( i, x, torch_obj=torch_loaded, master_url=master_url, iters=iters, verbose=verbose, mini_batch=mini_batch, validation_pct=validation_pct, world_size=world_size, device=device, early_stop_patience=int(early_stop_patience+0) ) ).collect() if partition_shuffles - i > 1: num_partitions = rdd.getNumPartitions() rdd = rdd.repartition(num_partitions) return state_dict[0] finally: p.terminate() p.join()