Exemple #1
0
    def process_batch(self, file_path, charmap, noise_types, frame_overlap_flag):
        if (self.b_id + self.batch) >= self.n_files:
            self.b_id = 0

        n_threads = 2
        pool = ThreadPool(processes=n_threads)
        # data per thread
        data_thread = int(np.floor(float(self.batch)/n_threads))
        # remaining data for last thread
        rem_data_lth = self.batch - (data_thread*(n_threads-1))
        threads = []
        for p in range(n_threads):
            s_i = self.b_id + (data_thread*p)
            if rem_data_lth != 0 and p == n_threads-1:
                e_i = s_i + (rem_data_lth - 1)
            else:
                e_i = s_i + (data_thread-1)
            threads.append(pool.apply_async(self.get_train_targ_data,args=(file_path, charmap, noise_types,
                                                                           frame_overlap_flag, s_i, e_i,p,
                                                                           data_thread)))
        #gc.collect()
        #-----------------------------------------------------------
        # Variables for collapsing results from different processes
        #-----------------------------------------------------------
        frames = None
        seq_lens = []
        transcripts = []
        t_indices = []
        t_values = []
        t_shape = []
        # ----------------------------------------------------------
        for p in range(n_threads):
            data, _, nframes, \
            transcript, targ_indices, \
            targ_values, targ_shape = threads[p].get()
            # Erase the memory in threads[p]
            threads[p] = None

            frames = self.append_tensor(frames,data)
            seq_lens.append(nframes)
            t_shape.append([targ_shape])
            transcripts.append(transcript)
            t_indices.append(targ_indices)
            t_values.append(targ_values)
            del data,nframes,transcript,targ_indices,targ_values,targ_shape

        pool.close()
        pool.join()
        pool._join_exited_workers()
        t_indices = np.concatenate(t_indices)
        t_values = np.concatenate(t_values)
        t_shape = np.concatenate(t_shape)
        t_shape = [self.batch,np.max(t_shape[:,1])]
        transcripts = [t for sublist in transcripts for t in sublist]
        seq_lens = np.concatenate(seq_lens)
        self.b_id += self.batch
        return frames,transcripts,t_indices,t_values,t_shape,seq_lens