def __init__(self, opt):
        """
        Attributes:
            _data (dict): 预处理之后的数据,包括所有图片的文件名,以及处理过后的描述
            all_imgs (tensor): 利用resnet50提取的图片特征,形状(200000,2048)
            caption(list): 长度为20万的list,包括每张图片的文字描述
            ix2id(dict): 指定序号的图片对应的文件名
            start_(int): 起始序号,训练集的起始序号是0,验证集的起始序号是190000,即
                前190000张图片是训练集,剩下的10000张图片是验证集
            len_(init): 数据集大小,如果是训练集,长度就是190000,验证集长度为10000
            traininig(bool): 是训练集(True),还是验证集(False)

            相当于从图片特征到文字的特征


        """
        self.opt = opt
        data = t.load(opt.caption_data_path)
        word2ix = data['word2ix']
        self.captions = data['caption']
        self.padding = word2ix.get(data.get('padding'))
        self.end = word2ix.get(data.get('end'))
        self._data = data
        self.ix2id = data['ix2id']
        self.all_imgs = t.load(opt.img_feature_path)
Esempio n. 2
0
 def process_line(cls, data, tokenizer, tokenize):
     prompts, texts = [], []
     text = data.get("title", "") + data.get("abstract", "") + data.get("content", "")
     if text:
         p, t = cls.process_sample("", tokenizer, tokenize), cls.process_sample(text, tokenizer, tokenize)
         prompts.append(p)
         texts.append(t)
     return prompts, texts
Esempio n. 3
0
 def __init__(self, opt):
     self.opt = opt
     data = t.load(opt.caption_data_path)
     word2ix = data['word2ix']
     self.padding = word2ix.get(data.get('padding'))
     self.end = word2ix.get(data.get('end'))
     self._data = data
     self.img_feats = t.load(opt.feats_path)
     self.train()
Esempio n. 4
0
 def __init__(self, opt, transforms=None, training=True):
     self.opt = opt
     self.training = training
     data = t.load(opt.caption_data_path)
     word2ix = data['word2ix']
     self.padding = word2ix.get(data.get('padding'))
     self.end = word2ix.get(data.get('end'))
     self._data = data
     self.normalize = tv.transforms.Normalize(mean=IMAGENET_MEAN,
                                              std=IMAGENET_STD)
     self.train()
Esempio n. 5
0
    def __init__(self, opt):

        self.opt = opt
        data = t.load(opt.caption_data_path)
        self.data = data
        self.captions = data['caption']
        self.word2ix = data['word2ix']
        self.padding = self.word2ix[data.get('padding')]
        self.end = self.word2ix[data.get('end')]
        self._data = data
        self.ix2word = data['ix2word']
        self.all_imgs = t.load(opt.img_feature_path)  # 200k*2048
Esempio n. 6
0
    def __init__(self, spec):
        self.mode = spec['mode'] # either 'train' or 'test'
        self.root = spec['root']
        self.images = None
        self.labels = None
        # self.transforms = spec['transforms'] # Already transformed (?)

        with h5py.File('usps.h5', 'r') as hf:
            data = hf.get(self.mode)
            self.images = data.get('data')[:]
            self.images = torch.as_tensor(self.images).reshape(-1, 1, 16, 16)
            self.labels = data.get('target')[:]
            self.labels = torch.as_tensor(self.labels)
Esempio n. 7
0
 def __init__(self):
     data = t.load('caption.pth')
     word2ix = data['word2ix']
     self.ix2word = data['ix2word']
     self.captions = data['caption']
     self.padding = word2ix.get(data.get('padding'))
     self.end = word2ix.get(data.get('end'))
     self._data = data
     self.ix2id = data['ix2id']
     all_low = t.load('results.pth')
     self.all_low = all_low
     all_pic_r = t.load('data_save/multi_label_extract_pic.pth')
     all_pic = all_pic_r['multi_label_extract_pic']
     self.all_pic = all_pic
     all_block_r = t.load('data_save/imageai_multi_label_extract_block.pth')
     all_block = all_block_r['multi_label_extract_block']
     self.all_block = all_block
Esempio n. 8
0
 def process_line(self, data, tokenizer, tokenize):
     text = data.get("text", None)
     if text:
         prompt, text = self.process_sample("", tokenizer,
                                            tokenize), self.process_sample(
                                                text, tokenizer, tokenize)
         return [prompt], [text]
     else:
         return [], []
Esempio n. 9
0
 def process_line(self, data, tokenizer, tokenize):
     text = ""
     title = data.get("title", None)
     description = data.get("description", None)
     maintext = data.get("maintext", None)
     if title:
         text += title.strip() + " "
     if description and (not maintext
                         or not maintext.startswith(description)):
         text += description.strip() + " "
     if maintext:
         text += maintext
     if len(text) > 100:
         prompt, text = self.process_sample("", tokenizer,
                                            tokenize), self.process_sample(
                                                text, tokenizer, tokenize)
         return [prompt], [text]
     else:
         return [], []
Esempio n. 10
0
 def __init__(self,opt,transforms=None):
     '''
     Attributes:
         _data (dict): 预处理之后的数据,包括所有图片的文件名,以及处理过后的描述
         all_imgs (tensor): 利用resnet50提取的图片特征,形状(200000,2048)
         caption(list): 长度为20万的list,包括每张图片的文字描述
         ix2id(dict): 指定序号的图片对应的文件名
         start_(int): 起始序号,训练集的起始序号是0,验证集的起始序号是190000,即
             前190000张图片是训练集,剩下的10000张图片是验证集
         len_(init): 数据集大小,如果是训练集,长度就是190000,验证集长度为10000
         traininig(bool): 是训练集(True),还是验证集(False)
     '''
     self.opt = opt
     data = t.load(opt.caption_data_path)
     word2ix = data['word2ix']
     self.captions = data['caption']
     self.padding = word2ix.get(data.get('padding'))
     self.end = word2ix.get(data.get('end'))
     self._data = data
     self.ix2id = data['ix2id']
     self.all_imgs = t.load(opt.img_feature_path)
Esempio n. 11
0
 def process_line(cls, data, tokenizer, tokenize):
     if "title" not in data:
         return [], []
     prompts, texts = [], []
     qtitle = data["title"]
     qcontent = data.get("content", "")
     qcontent = cls.trim_field(qcontent, max_length=100)
     prompt = cls.qtitle_prefix + qtitle + cls.qcontent_prefix + qcontent + cls.answer_prefix
     prompt = cls.process_sample(prompt, tokenizer, tokenize)
     if "best_answer" in data:
         text = data["best_answer"]["content"]
         if len(text) > 10:
             text = cls.process_sample(text, tokenizer, tokenize)
             prompts.append(prompt)
             texts.append(text)
     for answer in data.get("other_answers", []):
         text = answer["content"]
         if len(text) > 100:
             text = cls.process_sample(text, tokenizer, tokenize)
             prompts.append(prompt)
             texts.append(text)
     return prompts, texts
Esempio n. 12
0
 def process_line(cls, data, tokenizer, tokenize):
     prompts, texts = [], []
     ans_length = len(data.get("ans-content", ""))
     ans_up = data.get("ans-up-num", "")
     ans_up = int(ans_up) if ans_up else 0
     if ans_length > 100 or ans_up > 1000:
         qtitle = data["q_title"]
         qcontent = data["q-content"]
         if qcontent is None:
             qcontent = ""
         qcontent = cls.trim_field(qcontent, max_length=100)
         user = data.get("user-signature", "")
         prompt = cls.qtitle_prefix + qtitle + cls.qcontent_prefix + qcontent + cls.user_prefix + user + cls.answer_prefix
         text = data["ans-content"]
         prompt, text = cls.process_sample(prompt, tokenizer, tokenize), cls.process_sample(text, tokenizer,
                                                                                            tokenize)
         prompts.append(prompt)
         texts.append(text)
     # prompt = data["q_title"] + data["q-content"] + data["user-signature"]
     # text = data["ans-content"]
     # prompts.append(prompt)
     # texts.append(text)
     return prompts, texts
Esempio n. 13
0
 def process_line(self, data, tokenizer, tokenize):
     source = data["meta"].get("pile_set_name", None)
     text = data.get("text", None)
     if source and text:
         if source in self.filtered_sources:
             return [], [], None
         elif source in self.downsample_sources and random.random(
         ) > self.downsample_sources[source]:
             return [], [], None
         else:
             prompt, text = self.process_sample(
                 "", tokenizer,
                 tokenize), self.process_sample(text, tokenizer, tokenize)
             return [prompt], [text], source
     else:
         return [], [], None
Esempio n. 14
0
    def read_normalize(self):
        try:
            with open(self.path + "\\dataSetJson.json", "r") as json_file:
                data = json.load(json_file)
            print(self.path + "\\dataSetJson.json")
            if not (data.get('normalize') is None):
                #print("Qui")
                norm = data['normalize']
                if not (norm.get('mean') and norm.get('dev_std')) is None:
                    self.mean = tuple(norm['mean'])
                    print(self.mean)
                    self.dev_std = tuple(norm['dev_std'])
                    print(self.dev_std)

                    self.transform = transforms.Compose([
                        transforms.Resize(self.resize),
                        transforms.ToTensor(),
                        transforms.Normalize(self.mean, self.dev_std)
                    ])

        except:
            sys.stderr.write("Error not mormalize")
            exit(0)
Esempio n. 15
0
def eval_split(model, crit, loader, args):
    verbose = True
    num_images = 10
    split = 'val'
    lang_eval = 0
    dataset = 'coco'
    beam_size = 1

    model.eval()
    loader.reset_iterator(split)

    n = 0
    loss = 0
    loss_sum = 0
    loss_evals = 1e-8
    predictions = []
    while True:
        data = loader.get_batch(split)
        n = n + loader.batch_size
        if data.get('labels', None) is not None:
            tmp = [
                data['fc_feats'], data['att_feats'], data['labels'],
                data['masks']
            ]
            tmp = [
                Variable(torch.from_numpy(_), volatile=True).cuda()
                for _ in tmp
            ]
            fc_feats, att_feats, labels, masks = tmp
            loss = crit(model(fc_feats, att_feats, labels), labels[:, 1:],
                        masks[:, 1:]).data[0]
            loss_sum = loss_sum + loss
            loss_evals = loss_evals + 1
        tmp = [
            data['fc_feats'][np.arange(loader.batch_size) *
                             loader.seq_per_img],
            data['att_feats'][np.arange(loader.batch_size) *
                              loader.seq_per_img]
        ]
        tmp = [
            Variable(torch.from_numpy(_), volatile=True).cuda() for _ in tmp
        ]
        fc_feats, att_feats = tmp
        seq, _ = model.sample(fc_feats, att_feats)
        sents = decode_sequence(loader.get_vocab(), seq)
        for k, sent in enumerate(sents):
            entry = {'image_id': data['infos'][k]['id'], 'caption': sent}
            entry['file_name'] = data['infos'][k]['file_path']
            predictions.append(entry)
            cmd = 'cp "' + os.path.join(
                args.data_dir,
                data['infos'][k]['file_path']) + '" vis/imgs/img' + str(
                    len(predictions)) + '.jpg'
            #print(cmd)
            print('image %s: %s' % (entry['image_id'], entry['caption']))
            os.system(cmd)
        ix0 = data['bounds']['it_pos_now']
        ix1 = data['bounds']['it_max']
        if num_images != -1:
            ix1 = min(ix1, num_images)
        for i in range(n - ix1):
            predictions.pop()
        if data['bounds']['new_epoch']:
            break
        if num_images >= 0 and n >= num_images:
            break
    lang_stats = language_eval(predictions)
    return lang_stats
Esempio n. 16
0
def go_eval(netG, dataset_dt, dt_kw=[], g_kw={}, crop_size=74):

    scale = None

    def _overlap_crop_forward(x, shave=10, min_size=100000, bic=None):
        """
		chop for less memory consumption during test
		"""
        n_GPUs = 2
        #scale = self.scale
        b, c, h, w = x.size()
        h_half, w_half = h // 2, w // 2
        h_size, w_size = h_half + shave, w_half + shave
        lr_list = [
            x[:, :, 0:h_size, 0:w_size], x[:, :, 0:h_size, (w - w_size):w],
            x[:, :, (h - h_size):h, 0:w_size], x[:, :, (h - h_size):h,
                                                 (w - w_size):w]
        ]

        if bic is not None:
            bic_h_size = h_size * scale
            bic_w_size = w_size * scale
            bic_h = h * scale
            bic_w = w * scale

            bic_list = [
                bic[:, :, 0:bic_h_size, 0:bic_w_size],
                bic[:, :, 0:bic_h_size, (bic_w - bic_w_size):bic_w],
                bic[:, :, (bic_h - bic_h_size):bic_h,
                    0:bic_w_size], bic[:, :, (bic_h - bic_h_size):bic_h,
                                       (bic_w - bic_w_size):bic_w]
            ]

        if w_size * h_size < min_size:
            sr_list = []
            for i in range(0, 4, n_GPUs):
                lr_batch = torch.cat(lr_list[i:(i + n_GPUs)], dim=0)
                if bic is not None:
                    bic_batch = torch.cat(bic_list[i:(i + n_GPUs)], dim=0)

                sr_batch_temp = netG(lr_batch, scale=4)

                if isinstance(sr_batch_temp, list):
                    sr_batch = sr_batch_temp[-1]
                else:
                    sr_batch = sr_batch_temp

                sr_list.extend(sr_batch.chunk(n_GPUs, dim=0))
        else:
            sr_list = [
             _overlap_crop_forward(patch, shave=shave, min_size=min_size) \
             for patch in lr_list
             ]

        h, w = scale * h, scale * w
        h_half, w_half = scale * h_half, scale * w_half
        h_size, w_size = scale * h_size, scale * w_size
        shave *= scale

        output = x.new(b, c, h, w)
        output[:, :, 0:h_half, 0:w_half] \
         = sr_list[0][:, :, 0:h_half, 0:w_half]
        output[:, :, 0:h_half, w_half:w] \
         = sr_list[1][:, :, 0:h_half, (w_size - w + w_half):w_size]
        output[:, :, h_half:h, 0:w_half] \
         = sr_list[2][:, :, (h_size - h + h_half):h_size, 0:w_half]
        output[:, :, h_half:h, w_half:w] \
         = sr_list[3][:, :, (h_size - h + h_half):h_size, (w_size - w + w_half):w_size]

        return output

    dl = torch.utils.data.DataLoader(dataset_dt, batch_size=1)
    psnr_list = []
    ssim_list = []
    for i, data in enumerate(dl):
        if dt_kw is None:
            lr, hr = data
        else:
            lr, hr = data.get(dt_kw[0]), data.get(dt_kw[1])

        scale = g_kw.get("scale") if "scale" in g_kw else 4
        o_hr = _overlap_crop_forward(lr)
        assert (o_hr.shape == hr.shape)
        o_hr = Tensor2np(o_hr[0].cpu())
        hr = Tensor2np(hr[0].cpu())

        psnr, ssim = metric.calc_metrics(o_hr, hr, crop_border=scale)

        psnr_list.append(psnr)
        ssim_list.append(ssim)

    print("psnr:", np.mean(psnr_list), "ssim:", np.mean(ssim_list))
Esempio n. 17
0
 def row(self, data):
     return self.fmt.format(**{
         k: str(data.get(k, ''))[:w]
         for k, w in self.width.iteritems()
     })
Esempio n. 18
0
    def __getitem__(self, indexs):
        if self.data is None:
            self.data = np.load(self.hparams.ds_name + '.npz',
                                allow_pickle=True)

        ret = []
        if not isinstance(indexs, list):
            _indexs = [indexs]
        else:
            _indexs = indexs
        for index in _indexs:
            data = self.data[self.audio_and_text_keys[index]].item()
            text = np.array(data['text'], np.int)

            if self.hparams.multispeaker:
                spk_id = data.get('spk_id', 0)
            else:
                spk_id = 0

            if self.hparams.add_sil == 2:
                text = text.reshape(-1,
                                    3 if not self.hparams.use_pinyin else 2)
                text = np.concatenate(
                    [text, 2 * np.ones([text.shape[0], 1], np.int)],
                    -1)  # [L, 4]
                text = text.reshape(-1)  # [L + L//3]
            elif self.hparams.add_sil == 3:
                text = np.stack([text, 128 + text, 256 + text], -1)  # [L, 3]
                text = text.reshape(
                    -1, 9 if not self.hparams.use_pinyin else 2)  # [L/3, 9]
                text = np.concatenate(
                    [text, 2 * np.ones([text.shape[0], 1], np.int)],
                    -1)  # [L/3, 10]
                text = text.reshape(-1)  # [10L/3]

            text = torch.from_numpy(text)
            mel = torch.from_numpy(np.array(data['mels']).reshape(-1, 80).T)
            if self.hparams.use_linear or self.hparams.linear_directly:
                linear = torch.from_numpy(
                    np.array(data['linear']).reshape(-1,
                                                     self.hparams.num_freq).T)
            else:
                linear = None

            if self.hparams.speech and self.type != 'val':
                mel = mel[:, :1550]
                text = text[:350]
                if linear:
                    linear = linear[:1550]

            pitch = None
            if self.hparams.use_pitch:
                pitch_key = 'pitches' if not self.hparams.use_smooth_pitch else 'smooth_pitches'
                pitch = torch.from_numpy(np.array(data[pitch_key], np.int))

            utt_ids = torch.from_numpy(np.array(data['utt_id'], np.int))
            if self.hparams.prefix_len > 0:
                text_len = int(self.hparams.prefix_len * text.shape[0] /
                               mel.shape[1])
                text = text[:text_len]
                mel = mel[:, :self.hparams.prefix_len]
                pitch = pitch[:self.hparams.prefix_len]

            attn = None
            if self.hparams.use_ali or self.hparams.use_ali_mask:
                attn = np.zeros((mel.shape[1], text.shape[0]))
                if self.hparams.use_phoneme_align:
                    mel_splits = [
                        int(x * self.hparams.audio_sample_rate /
                            self.hparams.hop_size) for x in data['splits']
                    ]
                    last = 0
                    for t_idx, s in enumerate(mel_splits):
                        attn[last:s, t_idx] = 1
                else:
                    splits_begin = np.clip(np.array(data['splits'], np.int), 0,
                                           mel.shape[1] - 1)
                    splits_end = np.clip(np.array(data['splits_end'], np.int),
                                         0, mel.shape[1] - 1)
                    splits_begin = [0] + list(splits_begin)
                    splits_end = [0] + list(splits_end)
                    if not self.hparams.use_ali_mask2:  # TODO: PINYIN?
                        if self.hparams.use_pinyin:
                            for i in range(text.shape[0] // 3):
                                splits_begin_step = (splits_begin[i + 1] -
                                                     splits_begin[i] - 3) / 2
                                if self.hparams.attn_step_clip10:
                                    splits_begin_step = np.clip(
                                        splits_begin_step, 0, 10)
                                attn[int(splits_begin[i]
                                         ):int(splits_begin[i] +
                                               splits_begin_step), i * 3] += 1
                                attn[int(splits_begin[i] + splits_begin_step
                                         ):int(splits_begin[i] +
                                               splits_begin_step * 2),
                                     i * 3 + 1] += 1
                                attn[int(splits_begin[i + 1]) -
                                     3:int(splits_begin[i + 1]),
                                     i * 3 + 2] += 1
                        else:
                            if self.hparams.add_sil == 0:
                                for i in range(text.shape[0] // 3):
                                    splits_begin_step = (splits_begin[i + 1] -
                                                         splits_begin[i]) / 3
                                    splits_end_step = (splits_end[i + 1] -
                                                       splits_end[i]) / 3
                                    if self.hparams.attn_step_clip10:
                                        splits_begin_step = np.clip(
                                            splits_begin_step, 0, 10)
                                        splits_end_step = np.clip(
                                            splits_end_step, 0, 10)
                                    attn[int(splits_begin[i]
                                             ):int(splits_begin[i] +
                                                   splits_begin_step),
                                         i * 3] += 0.5

                                    attn[int(splits_begin[i] +
                                             splits_begin_step
                                             ):int(splits_begin[i] +
                                                   splits_begin_step * 2),
                                         i * 3 + 1] += 0.5

                                    attn[int(splits_begin[i] +
                                             splits_begin_step *
                                             2):int(splits_begin[i + 1]),
                                         i * 3 + 2] += 0.5

                                    attn[int(splits_end[i]
                                             ):int(splits_end[i] +
                                                   splits_end_step),
                                         i * 3] += 0.5

                                    attn[int(splits_end[i] + splits_end_step
                                             ):int(splits_end[i] +
                                                   splits_end_step * 2),
                                         i * 3 + 1] += 0.5

                                    attn[int(splits_end[i] + splits_end_step *
                                             2):int(splits_end[i + 1]),
                                         i * 3 + 2] += 0.5
                            elif self.hparams.add_sil == 2:
                                for i in range(text.shape[0] // 4):
                                    splits_begin_step = (splits_begin[i + 1] -
                                                         splits_begin[i] -
                                                         3) / 3
                                    splits_end_step = (splits_end[i + 1] -
                                                       splits_end[i] - 3) / 3
                                    if self.hparams.attn_step_clip10:
                                        splits_begin_step = np.clip(
                                            splits_begin_step, 0, 10)
                                        splits_end_step = np.clip(
                                            splits_end_step, 0, 10)

                                    attn[int(splits_begin[i]
                                             ):int(splits_begin[i] +
                                                   splits_begin_step),
                                         i * 4] += 0.5
                                    attn[int(splits_begin[i] +
                                             splits_begin_step
                                             ):int(splits_begin[i] +
                                                   splits_begin_step * 2),
                                         i * 4 + 1] += 0.5
                                    attn[int(splits_begin[i] +
                                             splits_begin_step *
                                             2):int(splits_begin[i + 1]) - 3,
                                         i * 4 + 2] += 0.5
                                    attn[int(splits_begin[i + 1]) -
                                         3:int(splits_begin[i + 1]),
                                         i * 4 + 3] += 0.5

                                    attn[int(splits_end[i]
                                             ):int(splits_end[i] +
                                                   splits_end_step),
                                         i * 4] += 0.5
                                    attn[int(splits_end[i] + splits_end_step
                                             ):int(splits_end[i] +
                                                   splits_end_step * 2),
                                         i * 4 + 1] += 0.5
                                    attn[int(splits_end[i] + splits_end_step *
                                             2):int(splits_end[i + 1]) - 3,
                                         i * 4 + 2] += 0.5
                                    attn[int(splits_end[i + 1]) -
                                         3:int(splits_end[i + 1]),
                                         i * 4 + 3] += 0.5
                    else:
                        for i in range(text.shape[0] // 3):
                            attn[int(splits_begin[i]):int(splits_begin[i + 1]),
                                 i * 3:(i + 1) * 3] = 1
                            attn[int(splits_end[i]):int(splits_end[i + 1]),
                                 i * 3:(i + 1) * 3] = 1
                attn = torch.from_numpy(attn)

            if self.hparams.use_wavenet:
                wav = torch.from_numpy(np.array(data['raw_wav']))
                max_time_steps = self.hparams.wavenet_max_time
                if wav.shape[0] > max_time_steps:
                    max_time_frames = max_time_steps // audio.get_hop_size(
                        self.hparams)
                    start_cond_idx = torch.randint(
                        mel.shape[1] - max_time_frames, [])
                else:
                    start_cond_idx = 0
            else:
                wav = None
                start_cond_idx = None
            if self.hparams.linear_directly:
                mel = linear
            ret.append([
                text, mel, pitch, utt_ids, attn, linear, spk_id, wav,
                start_cond_idx
            ])
        if not isinstance(indexs, list):
            return ret[0]
        else:
            return ret
Esempio n. 19
0

model = torchvision.models.resnet152(pretrained=False).to(device)
# model = torch.load(MODEL).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
EPOCHS = 50
net = model.train()
for epoch in range(EPOCHS):
    print("Epoch = ", epoch)
    i = 0
    cum_loss = 0
    total_train = 0
    correct_train = 0
    for data in trainset:
        X = data.get("image").to(device)
        y = data.get("solution").to(device)
        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, y.long())
        loss.backward()
        optimizer.step()
        i += BATCH_SIZE
        cum_loss += loss
        _, prediction = torch.max(output.data, 1)
        print(loss)
        print(i)
        total_train += y.nelement()
        correct_train += prediction.eq(y.data).sum().item()
        train_accuracy = 100 * correct_train / total_train
    print("Accuracy = ",train_accuracy, "%")