def encode(self, sents):
        batches = []
        for b in range(0, len(sents), self.chunck_size):
            tokens = [
                self.tokenizer.encode(x)[:self.max_length]
                for x in sents[b:b + self.chunck_size]
            ]  # tokenize
            # tokens = [self.tokenizer.convert_tokens_to_ids(x) for x in tokens] # convert to ids
            tokens = torch.tensor(zero_padding(tokens)).transpose(
                0, 1)  # padding and into tensors

            batches.append(tokens)
            # print(tokens)
            # break
        # query the
        cpu = torch.device('cpu')
        out = []
        clear_cache = 32
        with torch.no_grad():
            counter = 0
            for batch in tqdm(batches):
                batch = batch.to(self.device)
                hidden_states = self.model(batch)[0]
                # print(hidden_states)
                out.append(hidden_states[:, -1, :].to(cpu).numpy())
                counter += 1
                # if(counter >= clear_cache):
                #     counter =  0
                #     torch.cuda.empty_cache()
        return np.concatenate(out, axis=0)
Exemple #2
0
    def encode(self, sents):
        batches = []
        for b in range(0, len(sents), self.chunck_size):
            tokens = [
                self.tokenizer.tokenize(x)[:self.max_length]
                for x in sents[b:b + self.chunck_size]
            ]  # tokenize
            tokens = [self.tokenizer.convert_tokens_to_ids(x) for x in tokens]
            tokens = torch.tensor(zero_padding(tokens)).transpose(
                0, 1)  # padding and into tensors
            batches.append(tokens)
            # print(tokens)
            # break
        # query the
        cpu = torch.device('cpu')
        out = []
        clear_cache = 32
        with torch.no_grad():
            counter = 0
            for batch in tqdm(batches):
                batch = batch.to(self.device)

                hidden_states = self.model(
                    batch
                )[0]  # how to use the embedding: https://github.com/zihangdai/xlnet/blob/master/modeling.py @func: summarize_sequences
                # print(mems.size())
                # print(hidden_states[-1].size())
                out.append(hidden_states[:, -1, :].to(cpu).numpy())

                counter += 1
                # if(counter >= clear_cache):
                #     counter =  0
                #     torch.cuda.empty_cache()
        return np.concatenate(out, axis=0)
Exemple #3
0
 def encode(self, sents):
     batches = []
     # print(sents)
     for b in range(0, len(sents), self.chunck_size):
         tokens = [
             self.tokenizer.encode(
                 x, add_special_tokens=(
                     self.name == 'roberta'))[:self.max_length]
             for x in sents[b:b + self.chunck_size]
         ]  # tokenize
         # print(tokens)
         # print([len(x) for x in tokens])
         # print([len(x) for x in sents[b:b+self.chunck_size]])
         tokens = torch.tensor(zero_padding(tokens)).transpose(
             0, 1)  # padding and into tensors
         batches.append(tokens)
         # print(tokens)
         # break
     # query the
     cpu = torch.device('cpu')
     out = []
     with torch.no_grad():
         counter = 0
         for batch in (batches):
             batch = batch.to(self.device)
             hidden_states = self.model(batch)[0]
             if (self.pooling == 'mean'):
                 sent_emb = hidden_states.mean(dim=1)
             elif (self.pooling == 'first'):
                 sent_emb = hidden_states[:, 0, :]
             else:
                 sent_emb = hidden_states[:, -1, :]
             out.append(sent_emb.to(cpu).numpy())
             counter += 1
     return np.concatenate(out, axis=0)
Exemple #4
0
def compress_speck(path, dest_path, dec_level, bpp):
    if path[-1] != "/":
        path += "/"
    info = pickle.load(open(path + "info.dat", "r"))
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)
    codec = sk.speck()
    info.wavelet = "cdf97"
    info.wavelet_level = dec_level
    info.frames = 3
    info.bpp = bpp
    for c in range(info.frames):
        frame = cv2.imread(path + str(c) + ".png",
                                    cv2.CV_LOAD_IMAGE_GRAYSCALE)
        info.cols = frame.shape[1]
        info.rows = frame.shape[0]
        frame = tools.zero_padding(frame)
        info.wavelet_cols = frame.shape[1]
        info.wavelet_rows = frame.shape[0]
        wavelet = lwt.cdf97(frame, dec_level)
        wavelet = tools.quant(wavelet, 0.00001)
        coded_frame = codec.compress(wavelet, bpp)
        stream = dict()
        stream["wise_bit"] = coded_frame[3]
        stream["payload"] = coded_frame[4]
        try:
            pickle.dump(stream, open(dest_path + str(c) + ".speck", "wb"))
        except:
            print "Failed to create: " + dest_path + str(c) + ".speck"
    pickle.dump(info, open(dest_path + "info.dat", "w"))
Exemple #5
0
 def encode(self, sents):
     batches = []
     for b in range(0, len(sents), self.chunck_size):
         tokens = [self.tokenizer.encode(x)[:self.max_length] for x in sents[b:b + self.chunck_size]]  # tokenize
         tokens = torch.tensor(zero_padding(tokens)).transpose(0, 1)  # padding and into tensors
         batches.append(tokens)
         # print(tokens)
         # break
     # query the
     cpu = torch.device('cpu')
     out = []
     with torch.no_grad():
         counter = 0
         for batch in tqdm(batches):
             batch = batch.to(self.device)
             hidden_states = self.model(batch)[0]
             out.append(hidden_states[:, -1, :].to(cpu).numpy())
             counter += 1
     return np.concatenate(out, axis=0)
Exemple #6
0
def compress_motion_speck(path, dest_path, bpp, dec_level=4,
                          macroblock_size=8, fixed_keyframe=0):
    """This method compress a image sequence from a directory using speck and
    motion compensation.

    Args:
        path: The directory where the image sequence to be encoded is stored
        dest_path: A destination directory where the encoded frames will be
                   stored
        bpp: Compression ratio on bits per pixel

    Kwargs:
        dec_level: Level of wavelet decomposition to be used
        macroblock_size: size of the macroblock used for motion compensation
        fixed_keyframe: size of the Group of Pictures

    """
    if path[-1] != "/":
        path += "/"
    info = pickle.load(open(path + "info.dat", "r"))
    is_key = 0
    info.fixed_keyframe = fixed_keyframe
    info.full_size = 1
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)
    info.macroblock_size = macroblock_size
    codec = sk.speck()
    info.wavelet = "cdf97"
    info.wavelet_level = dec_level
    info.full_size = 100
    for c in range(info.frames):
        original_frame = cv2.imread(path + str(c) + ".png",
                                    cv2.CV_LOAD_IMAGE_GRAYSCALE)
        info.cols = original_frame.shape[1]
        info.rows = original_frame.shape[0]
        frame = tools.zero_padding(original_frame)
        info.wavelet_cols = frame.shape[1]
        info.wavelet_rows = frame.shape[0]
        if is_key == 0:
            wavelet = lwt.cdf97(frame, dec_level)
            wavelet = tools.quant(wavelet, 0.0001)
            coded_frame = codec.compress(wavelet, bpp)
            stream = dict()
            stream["wise_bit"] = coded_frame[3]
            stream["payload"] = coded_frame[4]
            try:
                pickle.dump(stream, open(dest_path + str(c) + ".speck", "wb"))
            except:
                print "Failed to create: " + dest_path + str(c) + ".png"
            iwave = codec.expand(coded_frame[4], frame.shape[1],
                                 frame.shape[0], dec_level, coded_frame[3])
            iframe = lwt.icdf97(iwave)
            is_key = fixed_keyframe - 1
            key_frame = iframe
            info.motion_vectors += [0]
        else:
            p_frame, mvs = intraframe.encode_motion_frame(frame,
                                                          key_frame,
                                                          macroblock_size,
                                                          info.full_size)
            info.motion_vectors += [(mvs)]
            is_key -= 1
            wavelet = lwt.cdf97(p_frame, dec_level)
            wavelet = tools.quant(wavelet, 0.0001)
            coded_frame = codec.compress(wavelet, bpp)
            stream = dict()
            stream["wise_bit"] = coded_frame[3]
            stream["payload"] = coded_frame[4]
            try:
                pickle.dump(stream, open(dest_path + str(c) + ".speck", "wb"))
            except:
                print "Failed to create: " + dest_path + str(c) + ".png"
    pickle.dump(info, open(dest_path + "info.dat", "w"))