def encode(self, sents): batches = [] for b in range(0, len(sents), self.chunck_size): tokens = [ self.tokenizer.encode(x)[:self.max_length] for x in sents[b:b + self.chunck_size] ] # tokenize # tokens = [self.tokenizer.convert_tokens_to_ids(x) for x in tokens] # convert to ids tokens = torch.tensor(zero_padding(tokens)).transpose( 0, 1) # padding and into tensors batches.append(tokens) # print(tokens) # break # query the cpu = torch.device('cpu') out = [] clear_cache = 32 with torch.no_grad(): counter = 0 for batch in tqdm(batches): batch = batch.to(self.device) hidden_states = self.model(batch)[0] # print(hidden_states) out.append(hidden_states[:, -1, :].to(cpu).numpy()) counter += 1 # if(counter >= clear_cache): # counter = 0 # torch.cuda.empty_cache() return np.concatenate(out, axis=0)
def encode(self, sents): batches = [] for b in range(0, len(sents), self.chunck_size): tokens = [ self.tokenizer.tokenize(x)[:self.max_length] for x in sents[b:b + self.chunck_size] ] # tokenize tokens = [self.tokenizer.convert_tokens_to_ids(x) for x in tokens] tokens = torch.tensor(zero_padding(tokens)).transpose( 0, 1) # padding and into tensors batches.append(tokens) # print(tokens) # break # query the cpu = torch.device('cpu') out = [] clear_cache = 32 with torch.no_grad(): counter = 0 for batch in tqdm(batches): batch = batch.to(self.device) hidden_states = self.model( batch )[0] # how to use the embedding: https://github.com/zihangdai/xlnet/blob/master/modeling.py @func: summarize_sequences # print(mems.size()) # print(hidden_states[-1].size()) out.append(hidden_states[:, -1, :].to(cpu).numpy()) counter += 1 # if(counter >= clear_cache): # counter = 0 # torch.cuda.empty_cache() return np.concatenate(out, axis=0)
def encode(self, sents): batches = [] # print(sents) for b in range(0, len(sents), self.chunck_size): tokens = [ self.tokenizer.encode( x, add_special_tokens=( self.name == 'roberta'))[:self.max_length] for x in sents[b:b + self.chunck_size] ] # tokenize # print(tokens) # print([len(x) for x in tokens]) # print([len(x) for x in sents[b:b+self.chunck_size]]) tokens = torch.tensor(zero_padding(tokens)).transpose( 0, 1) # padding and into tensors batches.append(tokens) # print(tokens) # break # query the cpu = torch.device('cpu') out = [] with torch.no_grad(): counter = 0 for batch in (batches): batch = batch.to(self.device) hidden_states = self.model(batch)[0] if (self.pooling == 'mean'): sent_emb = hidden_states.mean(dim=1) elif (self.pooling == 'first'): sent_emb = hidden_states[:, 0, :] else: sent_emb = hidden_states[:, -1, :] out.append(sent_emb.to(cpu).numpy()) counter += 1 return np.concatenate(out, axis=0)
def compress_speck(path, dest_path, dec_level, bpp): if path[-1] != "/": path += "/" info = pickle.load(open(path + "info.dat", "r")) if not os.path.exists(dest_path): os.makedirs(dest_path) codec = sk.speck() info.wavelet = "cdf97" info.wavelet_level = dec_level info.frames = 3 info.bpp = bpp for c in range(info.frames): frame = cv2.imread(path + str(c) + ".png", cv2.CV_LOAD_IMAGE_GRAYSCALE) info.cols = frame.shape[1] info.rows = frame.shape[0] frame = tools.zero_padding(frame) info.wavelet_cols = frame.shape[1] info.wavelet_rows = frame.shape[0] wavelet = lwt.cdf97(frame, dec_level) wavelet = tools.quant(wavelet, 0.00001) coded_frame = codec.compress(wavelet, bpp) stream = dict() stream["wise_bit"] = coded_frame[3] stream["payload"] = coded_frame[4] try: pickle.dump(stream, open(dest_path + str(c) + ".speck", "wb")) except: print "Failed to create: " + dest_path + str(c) + ".speck" pickle.dump(info, open(dest_path + "info.dat", "w"))
def encode(self, sents): batches = [] for b in range(0, len(sents), self.chunck_size): tokens = [self.tokenizer.encode(x)[:self.max_length] for x in sents[b:b + self.chunck_size]] # tokenize tokens = torch.tensor(zero_padding(tokens)).transpose(0, 1) # padding and into tensors batches.append(tokens) # print(tokens) # break # query the cpu = torch.device('cpu') out = [] with torch.no_grad(): counter = 0 for batch in tqdm(batches): batch = batch.to(self.device) hidden_states = self.model(batch)[0] out.append(hidden_states[:, -1, :].to(cpu).numpy()) counter += 1 return np.concatenate(out, axis=0)
def compress_motion_speck(path, dest_path, bpp, dec_level=4, macroblock_size=8, fixed_keyframe=0): """This method compress a image sequence from a directory using speck and motion compensation. Args: path: The directory where the image sequence to be encoded is stored dest_path: A destination directory where the encoded frames will be stored bpp: Compression ratio on bits per pixel Kwargs: dec_level: Level of wavelet decomposition to be used macroblock_size: size of the macroblock used for motion compensation fixed_keyframe: size of the Group of Pictures """ if path[-1] != "/": path += "/" info = pickle.load(open(path + "info.dat", "r")) is_key = 0 info.fixed_keyframe = fixed_keyframe info.full_size = 1 if not os.path.exists(dest_path): os.makedirs(dest_path) info.macroblock_size = macroblock_size codec = sk.speck() info.wavelet = "cdf97" info.wavelet_level = dec_level info.full_size = 100 for c in range(info.frames): original_frame = cv2.imread(path + str(c) + ".png", cv2.CV_LOAD_IMAGE_GRAYSCALE) info.cols = original_frame.shape[1] info.rows = original_frame.shape[0] frame = tools.zero_padding(original_frame) info.wavelet_cols = frame.shape[1] info.wavelet_rows = frame.shape[0] if is_key == 0: wavelet = lwt.cdf97(frame, dec_level) wavelet = tools.quant(wavelet, 0.0001) coded_frame = codec.compress(wavelet, bpp) stream = dict() stream["wise_bit"] = coded_frame[3] stream["payload"] = coded_frame[4] try: pickle.dump(stream, open(dest_path + str(c) + ".speck", "wb")) except: print "Failed to create: " + dest_path + str(c) + ".png" iwave = codec.expand(coded_frame[4], frame.shape[1], frame.shape[0], dec_level, coded_frame[3]) iframe = lwt.icdf97(iwave) is_key = fixed_keyframe - 1 key_frame = iframe info.motion_vectors += [0] else: p_frame, mvs = intraframe.encode_motion_frame(frame, key_frame, macroblock_size, info.full_size) info.motion_vectors += [(mvs)] is_key -= 1 wavelet = lwt.cdf97(p_frame, dec_level) wavelet = tools.quant(wavelet, 0.0001) coded_frame = codec.compress(wavelet, bpp) stream = dict() stream["wise_bit"] = coded_frame[3] stream["payload"] = coded_frame[4] try: pickle.dump(stream, open(dest_path + str(c) + ".speck", "wb")) except: print "Failed to create: " + dest_path + str(c) + ".png" pickle.dump(info, open(dest_path + "info.dat", "w"))