Esempio n. 1
0
 def get_text(self, text):
     text = text_to_sequence(text, self.char2idx)
     text_norm = torch.IntTensor(text)
     return text_norm
Esempio n. 2
0
def validation(model, criterion, evaluation_loader, converter, opt):
    """ validation or evaluation """
    n_correct = 0
    norm_ED = 0
    length_of_data = 0
    infer_time = 0
    valid_loss_avg = Averager()

    for i, (image_tensors, labels) in enumerate(evaluation_loader):
        batch_size = image_tensors.size(0)
        length_of_data = length_of_data + batch_size
        image = image_tensors.to(device)
        # For max length prediction
        length_for_pred = torch.IntTensor([opt["batch_max_length"]] *
                                          batch_size).to(device)
        text_for_pred = torch.LongTensor(batch_size, opt["batch_max_length"] +
                                         1).fill_(0).to(device)

        text_for_loss, length_for_loss = converter.encode(
            labels, batch_max_length=opt["batch_max_length"])

        start_time = time.time()
        if 'CTC' in opt["Prediction"]:
            preds = model(image, text_for_pred)
            forward_time = time.time() - start_time

            # Calculate evaluation loss for CTC deocder.
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            # permute 'preds' to use CTCloss format
            if opt.baiduCTC:
                cost = criterion(preds.permute(1, 0, 2), text_for_loss,
                                 preds_size, length_for_loss) / batch_size
            else:
                cost = criterion(
                    preds.log_softmax(2).permute(1, 0, 2), text_for_loss,
                    preds_size, length_for_loss)

            # Select max probabilty (greedy decoding) then decode index to character
            if opt.baiduCTC:
                _, preds_index = preds.max(2)
                preds_index = preds_index.view(-1)
            else:
                _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        else:
            preds = model(image, text_for_pred, is_train=False)
            forward_time = time.time() - start_time

            preds = preds[:, :text_for_loss.shape[1] - 1, :]
            target = text_for_loss[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.contiguous().view(-1, preds.shape[-1]),
                             target.contiguous().view(-1))

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)
            labels = converter.decode(text_for_loss[:, 1:], length_for_loss)

        infer_time += forward_time
        valid_loss_avg.add(cost)

        # calculate accuracy & confidence score
        preds_prob = F.softmax(preds, dim=2)
        preds_max_prob, _ = preds_prob.max(dim=2)
        confidence_score_list = []
        for gt, pred, pred_max_prob in zip(labels, preds_str, preds_max_prob):
            if 'Attn' in opt["Prediction"]:
                gt = gt[:gt.find('[s]')]
                pred_EOS = pred.find('[s]')
                pred = pred[:
                            pred_EOS]  # prune after "end of sentence" token ([s])
                pred_max_prob = pred_max_prob[:pred_EOS]

            # To evaluate 'case sensitive model' with alphanumeric and case insensitve setting.
            if opt["sensitive"] and opt.data_filtering_off:
                pred = pred.lower()
                gt = gt.lower()
                alphanumeric_case_insensitve = '0123456789abcdefghijklmnopqrstuvwxyz'
                out_of_alphanumeric_case_insensitve = f'[^{alphanumeric_case_insensitve}]'
                pred = re.sub(out_of_alphanumeric_case_insensitve, '', pred)
                gt = re.sub(out_of_alphanumeric_case_insensitve, '', gt)

            if pred == gt:
                n_correct += 1
            '''
            (old version) ICDAR2017 DOST Normalized Edit Distance https://rrc.cvc.uab.es/?ch=7&com=tasks
            "For each word we calculate the normalized edit distance to the length of the ground truth transcription."
            if len(gt) == 0:
                norm_ED += 1
            else:
                norm_ED += edit_distance(pred, gt) / len(gt)
            '''

            # ICDAR2019 Normalized Edit Distance
            if len(gt) == 0 or len(pred) == 0:
                norm_ED += 0
            elif len(gt) > len(pred):
                norm_ED += 1 - edit_distance(pred, gt) / len(gt)
            else:
                norm_ED += 1 - edit_distance(pred, gt) / len(pred)

            # calculate confidence score (= multiply of pred_max_prob)
            try:
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
            except:
                confidence_score = 0  # for empty pred case, when prune after "end of sentence" token ([s])
            confidence_score_list.append(confidence_score)
            # print(pred, gt, pred==gt, confidence_score)

    accuracy = n_correct / float(length_of_data) * 100
    norm_ED = norm_ED / float(
        length_of_data)  # ICDAR2019 Normalized Edit Distance

    return valid_loss_avg.val(
    ), accuracy, norm_ED, preds_str, confidence_score_list, labels, infer_time, length_of_data
Esempio n. 3
0
def val(net, test_dataset, criterion, max_iter=2):
    print('Start val')

    for p in cnn.parameters():
        p.requires_grad = False

    net.eval()
    net.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(cnn_data).items()})
    val_loader = torch.utils.data.DataLoader(
        test_dataset,
        shuffle=True,
        batch_size=batchSize,
        num_workers=int(workers),
        collate_fn=lmdb_dataset.alignCollate(keep_ratio=True))

    val_iter = iter(val_loader)

    n_correct = 0
    loss_avg = utils.averager()

    image = torch.FloatTensor(batchSize, 1, imgH, imgW)
    max_iter = min(max_iter, len(val_loader))
    for i in range(max_iter):
        data = val_iter.next()
        # i += 1
        cpu_images, cpu_texts = data
        # 输入的图片数
        batch_size = cpu_images.size(0)
        # print('cpu images', cpu_images, 'shape', cpu_images.size())
        utils.loadData(image, cpu_images)
        cpu_texts = [clean_txt(tx.encode('utf-8').decode('utf-8')) for tx in cpu_texts]
        t, l = converter.encode(cpu_texts)
        # 重新匹配尺寸
        utils.loadData(text, t)  # 文字索引
        utils.loadData(length, l)  # 文字

        image = cpu_images * 255
        image = image.cuda()
        preds = cnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))

        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)
        # 返回最大值和索引
        _, preds = preds.max(2)
        # 返回最大值的索引
        # print('max preds', preds)
        # preds = preds.squeeze(1)
        # 将tensor内存变为连续
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)

        print('preds', sim_preds, 'target', cpu_texts)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred.strip() == target.strip():
                n_correct += 1

    accuracy = n_correct / float(max_iter * batchSize)
    testLoss = loss_avg.val()
    # print('Test loss: %f, accuray: %f' % (testLoss, accuracy))
    return testLoss, accuracy
Esempio n. 4
0
imgH = 32  # should be 32
nclass = len(alphabet) + 1
nhiddenstate = 256

model = crnn.CRNN(imgH, 1, nclass, nhiddenstate)
if torch.cuda.is_available():
    model = model.cuda()
print('loading pretrained model from %s' % model_path)
model.load_state_dict(torch.load(model_path))

converter = utils.strLabelConverter(alphabet)

transformer = dataset.resizeNormalize((200, 32))
image = Image.open(img_path).convert('L')
image = transformer(image)
if torch.cuda.is_available():
    image = image.cuda()
image = image.view(1, *image.size())
image = Variable(image)

model.eval()
preds = model(image)

_, preds = preds.max(2)
preds = preds.transpose(1, 0).contiguous().view(-1)

preds_size = Variable(torch.IntTensor([preds.size(0)]))
raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
print('%-30s => %-30s' % (raw_pred, sim_pred))
Esempio n. 5
0
 def __init__(self, enabled=False):
     super().__init__()
     self.register_buffer('enabled', torch.IntTensor([0]))
     if enabled:
         self.is_enabled = True
def active_search(ptr_net,
                  point,
                  road=None,
                  iter_time=300,
                  batch_size=300,
                  lr_p=0.01,
                  beta1=0.9,
                  alpha=0.01,
                  alpha_decay=0.9,
                  plot_comp=True,
                  plot_mean=False,
                  print_searching_log=True,
                  log_file_name='search_log.csv',
                  save_net=True,
                  save_name='search_'):
    '''
    searching for shortest road for a particular points distribution
    point:(city,coor),tensor
    road:(city),numpy
    '''
    ptr_op = optim.Adam(ptr_net.parameters(), lr=lr_p, betas=(beta1, 0.999))
    point_copy = torch.unsqueeze(point, 0).repeat(batch_size, 1, 1)
    if road is None:
        road = ptr_net.get_road(point_copy)[0]
    city = road.shape[0]
    road = torch.IntTensor(road)
    road_copy = torch.unsqueeze(road, 0).repeat(batch_size, 1)
    road_best = road_copy[0]
    #point_copy:(batch,city,coor),tensor
    #road_copy:(batch,city),tensor
    #road_best:(city)
    length_best = get_length_sum(point_copy, road_copy)[0]
    length_init = get_length_sum(point_copy, road_copy)[0]
    #length_best:number
    baseline = length_best
    mean = []
    if print_searching_log:
        log_file = open(log_file_name, 'a')
        log_file.write('iter,ptr_loss,ptr_grad,best,mean_length\n')
        log_file.close()
    #length_best:number
    for i in range(iter_time):
        point_input = city_shuffle(point_copy)
        road_output = ptr_net.get_road(point_input)
        length_all = get_length_sum(point_input, road_output)
        #length_all:(batch)
        j = torch.argmin(length_all)
        if length_all[j] < length_best:
            length_best = length_all[j]
            road_shuffle = road_output[j, :]
            point_shuffle = point_input[j]
            #print(get_length_sum_single(point_shuffle,road_shuffle))
            road_best = adjust_road(road_shuffle, point_shuffle, point)
        adv = baseline - length_all
        ptr_loss = torch.dot(ptr_net(point_input, road_output), adv)
        ptr_net.zero_grad()
        ptr_loss.backward(retain_graph=True)
        ptr_grad = torch.nn.utils.clip_grad_norm_(ptr_net.parameters(), 1)
        ptr_op.step()
        if i % 10 == 0:
            print(i)
            if save_net:
                torch.save(ptr_net, save_name + 'ptr.pkl')
        mean.append(float((torch.mean(length_all))))
        if print_searching_log:
            log_file = open(log_file_name, 'a')
            log_file.write(
                str(i) + ',' + str(float(ptr_loss)) + ',' +
                str(float(ptr_grad)) + ',' + str(float(length_all[j])) + ',' +
                str(float((torch.mean(length_all)))) + '\n')
            log_file.close()
        baseline = baseline * alpha_decay + (
            1 - alpha_decay) * torch.mean(length_all)
    fig = plt.figure()
    if plot_comp == True:
        point = point.numpy()
        fig1, ax = plt.subplots(1, 2)
        ax_init = ax[0]
        ax_init.set_title('init:' + str(round(float(length_init), 4)),
                          fontsize=14,
                          fontweight='bold')
        for i in range(city - 1):
            ax_init.plot(point[[road[i], road[i + 1]], 0],
                         point[[road[i], road[i + 1]], 1],
                         color='b')
        ax_init.plot(point[[road[city - 1], road[0]], 0],
                     point[[road[city - 1], road[0]], 1],
                     color='b')
        road_best_copy = torch.unsqueeze(torch.IntTensor(road_best),
                                         0).repeat(batch_size, 1)
        ax_after = ax[1]
        ax_after.set_title(
            'after:' + str(round(float(get_length_sum(point, road_best)), 4)),
            fontsize=14,
            fontweight='bold')
        for i in range(city - 1):
            ax_after.plot(point[[road_best[i], road_best[i + 1]], 0],
                          point[[road_best[i], road_best[i + 1]], 1],
                          color='b')
        ax_after.plot(point[[road_best[city - 1], road_best[0]], 0],
                      point[[road_best[city - 1], road_best[0]], 1],
                      color='b')
    elif plot_mean == True:
        plt.plot(mean)
    fig.show()
    return {
        'ptr_net': ptr_net,
        'mean': mean,
        'road_best': road_best,
        'length_best': length_best
    }
Esempio n. 7
0
    def response(self, input_message):
        '''
        The agent moves a step forward, upon receiving a message from the user.
        '''
        assert input_message.sender == cfg.USER
        assert input_message.receiver == cfg.AGENT

        #_______ update the agent self_______
        if input_message.message_type == cfg.INFORM_FACET:
            self.update_upon_feature_inform(input_message)
        if input_message.message_type == cfg.REJECT_REC:
            self.rejected_item_list_ += input_message.data[
                'rejected_item_list']
            self.rejected_time += 1
            if self.mini == 1:
                if self.alwaysupdate == 1:
                    for i in range(cfg.update_count):
                        self.mini_update_FM()
                    self.mini_update_already = True
                    self.recent_candidate_list = list(
                        set(self.recent_candidate_list) -
                        set(self.rejected_item_list_))
                    self.recent_candidate_list = list(
                        set(self.recent_candidate_list) -
                        set([self.busi_id])) + [self.busi_id]
                    self.recent_candidate_list_ranked, self.previous_dict = rank_items(
                        self.known_feature, self.user_id, self.busi_id,
                        self.skip_big_feature, self.FM_model,
                        self.recent_candidate_list, self.write_fp, 1,
                        self.rejected_item_list_, self.previous_dict)

        #_______ Adding into history _______
        if input_message.message_type == cfg.INFORM_FACET:
            if self.turn_count > 0:  # means first doesn't# count
                if input_message.data['value'] is None:
                    self.history_list.append(0)  # ask attribute, fail
                else:
                    self.history_list.append(1)  # ask attribute, successful

        if input_message.message_type == cfg.REJECT_REC:
            self.history_list.append(
                -1)  # try recommendation, user doesn't want.
            self.recent_candidate_list = list(
                set(self.recent_candidate_list) -
                set(self.rejected_item_list_))  # don't consider

        if cfg.play_by != 'AOO' and cfg.play_by != 'AOO_valid':
            # Add control point here
            if cfg.mod == 'ear':
                state_vector = self.vectorize()
            else:
                state_vector = self.vectorize_crm()

        action = None
        SoftMax = nn.Softmax(dim=-1)
        if cfg.play_by == 'AOO' or cfg.play_by == 'AOO_valid':
            new_message = self.prepare_next_question()  #

        if cfg.play_by == 'AO':  # means Ask, the recommendation is made by a probability
            new_message = self.prepare_next_question()
            x = len(self.recent_candidate_list)
            p = 10.0 / x
            a = random.uniform(0, 1)
            if a < p:
                new_message = self.prepare_rec_message()

        if cfg.play_by == 'RO':
            # means RecOnly, only make recommendation at each turn.
            # For Abs-Greedy Evaluation
            new_message = self.prepare_rec_message()

        if cfg.play_by == 'policy':
            # means using Policy Network to determine action
            s = torch.from_numpy(state_vector).float()
            s = Variable(s, requires_grad=True)
            self.PN_model.eval()
            pred = self.PN_model(s)
            prob = SoftMax(pred)
            c = Categorical(prob)

            # different way to choose action
            if cfg.eval == 1:
                # for evaluation of Action stage
                pred_data = pred.data.tolist()
                sorted_index = sorted(range(len(pred_data)),
                                      key=lambda k: pred_data[k],
                                      reverse=True)

                # The following line are for avoid asking same question
                # It is a fair evaluation, because all models have such operation.
                unasked_max = None
                for item in sorted_index:
                    if item < self.big_feature_length:
                        if cfg.FACET_POOL[item] not in self.asked_feature:
                            unasked_max = item
                            break
                    else:
                        unasked_max = self.big_feature_length
                        break
                action = Variable(torch.IntTensor(
                    [unasked_max]))  # make it compatible with torch
                print('action is: {}'.format(action))
            else:
                # for training of Action stage
                i = 0
                action_ = self.big_feature_length
                while (i < 10000):
                    action_ = c.sample()
                    i += 1
                    if action_ <= self.big_feature_length:
                        if action_ == self.big_feature_length:
                            break
                        elif cfg.FACET_POOL[action_] not in self.asked_feature:
                            break
                action = action_
                print('action is: {}'.format(action))

            log_prob = c.log_prob(action)
            if self.turn_count != 0:
                self.log_prob_list = torch.cat(
                    [self.log_prob_list,
                     log_prob.reshape(1)])
            else:
                self.log_prob_list = log_prob.reshape(1)

            # translate into message
            if action < len(cfg.FACET_POOL):
                data = dict()
                data['facet'] = cfg.FACET_POOL[action]
                new_message = message(cfg.AGENT, cfg.USER, cfg.ASK_FACET, data)
            else:
                new_message = self.prepare_rec_message()

            self.action_tracker.append(action.data.numpy().tolist())
            self.candidate_length_tracker.append(
                len(self.recent_candidate_list))
        else:
            new_message = self.prepare_rec_message()

        # following are for writing to numpy array
        action = None
        if new_message.message_type == cfg.ASK_FACET:
            action = cfg.FACET_POOL.index(new_message.data['facet'])

        if new_message.message_type == cfg.MAKE_REC:
            action = len(cfg.FACET_POOL)

        if cfg.purpose == 'pretrain':
            self.numpy_list.append((action, state_vector))
        # end following

        with open(self.write_fp, 'a') as f:
            f.write('Turn count: {}, candidate length: {}\n'.format(
                self.turn_count, len(self.recent_candidate_list)))
        return new_message
Esempio n. 8
0
def post_processing(logits, image_size, gt_classes, anchors, conf_threshold,
                    nms_threshold):
    num_anchors = len(anchors)
    anchors = torch.Tensor(anchors)
    if isinstance(logits, Variable):
        logits = logits.data

    if logits.dim() == 3:
        logits.unsqueeze_(0)

    batch = logits.size(0)
    h = logits.size(2)
    w = logits.size(3)

    # Compute xc,yc, w,h, box_score on Tensor
    lin_x = torch.linspace(0, w - 1, w).repeat(h, 1).view(h * w)
    lin_y = torch.linspace(0, h - 1, h).repeat(w,
                                               1).t().contiguous().view(h * w)
    anchor_w = anchors[:, 0].contiguous().view(1, num_anchors, 1)
    anchor_h = anchors[:, 1].contiguous().view(1, num_anchors, 1)
    if torch.cuda.is_available():
        lin_x = lin_x.cuda()
        lin_y = lin_y.cuda()
        anchor_w = anchor_w.cuda()
        anchor_h = anchor_h.cuda()

    logits = logits.view(batch, num_anchors, -1, h * w)
    logits[:, :, 0, :].sigmoid_().add_(lin_x).div_(w)
    logits[:, :, 1, :].sigmoid_().add_(lin_y).div_(h)
    logits[:, :, 2, :].exp_().mul_(anchor_w).div_(w)
    logits[:, :, 3, :].exp_().mul_(anchor_h).div_(h)
    logits[:, :, 4, :].sigmoid_()

    with torch.no_grad():
        cls_scores = torch.nn.functional.softmax(logits[:, :, 5:, :], 2)
    cls_max, cls_max_idx = torch.max(cls_scores, 2)
    cls_max_idx = cls_max_idx.float()
    cls_max.mul_(logits[:, :, 4, :])

    score_thresh = cls_max > conf_threshold
    score_thresh_flat = score_thresh.view(-1)

    if score_thresh.sum() == 0:
        predicted_boxes = []
        for i in range(batch):
            predicted_boxes.append(torch.Tensor([]))
    else:
        coords = logits.transpose(2, 3)[..., 0:4]
        coords = coords[score_thresh[..., None].expand_as(coords)].view(-1, 4)
        scores = cls_max[score_thresh]
        idx = cls_max_idx[score_thresh]
        detections = torch.cat([coords, scores[:, None], idx[:, None]], dim=1)

        max_det_per_batch = num_anchors * h * w
        slices = [
            slice(max_det_per_batch * i, max_det_per_batch * (i + 1))
            for i in range(batch)
        ]
        det_per_batch = torch.IntTensor(
            [score_thresh_flat[s].int().sum() for s in slices])
        split_idx = torch.cumsum(det_per_batch, dim=0)

        # Group detections per image of batch
        predicted_boxes = []
        start = 0
        for end in split_idx:
            predicted_boxes.append(detections[start:end])
            start = end

    selected_boxes = []
    for boxes in predicted_boxes:
        if boxes.numel() == 0:
            return boxes

        a = boxes[:, :2]
        b = boxes[:, 2:4]
        bboxes = torch.cat([a - b / 2, a + b / 2], 1)
        scores = boxes[:, 4]

        # Sort coordinates by descending score
        scores, order = scores.sort(0, descending=True)
        x1, y1, x2, y2 = bboxes[order].split(1, 1)

        # Compute dx and dy between each pair of boxes (these mat contain every pair twice...)
        dx = (x2.min(x2.t()) - x1.max(x1.t())).clamp(min=0)
        dy = (y2.min(y2.t()) - y1.max(y1.t())).clamp(min=0)

        # Compute iou
        intersections = dx * dy
        areas = (x2 - x1) * (y2 - y1)
        unions = (areas + areas.t()) - intersections
        ious = intersections / unions

        # Filter based on iou (and class)
        conflicting = (ious > nms_threshold).triu(1)

        keep = conflicting.sum(0).byte()
        keep = keep.cpu()
        conflicting = conflicting.cpu()

        keep_len = len(keep) - 1
        for i in range(1, keep_len):
            if keep[i] > 0:
                keep -= conflicting[i]
        if torch.cuda.is_available():
            keep = keep.cuda()

        keep = (keep == 0)
        selected_boxes.append(boxes[order][keep[:,
                                                None].expand_as(boxes)].view(
                                                    -1, 6).contiguous())

    final_boxes = []
    for boxes in selected_boxes:
        if boxes.dim() == 0:
            final_boxes.append([])
        else:
            boxes[:, 0:3:2] *= image_size
            boxes[:, 0] -= boxes[:, 2] / 2
            boxes[:, 1:4:2] *= image_size
            boxes[:, 1] -= boxes[:, 3] / 2

            final_boxes.append([[
                box[0].item(), box[1].item(), box[2].item(), box[3].item(),
                box[4].item(), gt_classes[int(box[5].item())]
            ] for box in boxes])
    return final_boxes
Esempio n. 9
0
def evaluate(query_features, query_labels, query_cams, gallery_features,
             gallery_labels, gallery_cams):
    """Evaluate the CMC and mAP

    Arguments:
        query_features {np.ndarray of size NxC} -- Features of probe images
        query_labels {np.ndarray of query size N} -- Labels of probe images
        query_cams {np.ndarray of query size N} -- Cameras of probe images
        gallery_features {np.ndarray of size N'xC} -- Features of gallery images
        gallery_labels {np.ndarray of gallery size N'} -- Lables of gallery images
        gallery_cams {np.ndarray of gallery size N'} -- Cameras of gallery images

    Returns:
        (torch.IntTensor, float) -- CMC list, mAP
    """

    CMC = torch.IntTensor(len(gallery_labels)).zero_()
    AP = 0

    for i in range(len(query_labels)):
        query_feature = query_features[i]
        query_label = query_labels[i]
        query_cam = query_cams[i]

        # Prediction score
        score = np.dot(gallery_features, query_feature)

        match_query_index = np.argwhere(gallery_labels == query_label)
        same_camera_index = np.argwhere(gallery_cams == query_cam)

        # Positive index is the matched indexs at different camera i.e. the desired result
        positive_index = np.setdiff1d(match_query_index,
                                      same_camera_index,
                                      assume_unique=True)

        # Junk index is the indexs at the same camera or the unlabeled image
        junk_index = np.append(np.argwhere(gallery_labels == -1),
                               np.intersect1d(match_query_index,
                                              same_camera_index))  # .flatten()

        index = np.arange(len(gallery_labels))
        # Remove all the junk indexs
        sufficient_index = np.setdiff1d(index, junk_index)

        # compute AP
        y_true = np.in1d(sufficient_index, positive_index)
        y_score = score[sufficient_index]
        AP += average_precision_score(y_true, y_score)

        # Compute CMC
        # Sort the sufficient index by their scores, from large to small
        lexsort_index = np.argsort(y_score)
        sorted_y_true = y_true[lexsort_index[::-1]]
        match_index = np.argwhere(sorted_y_true == True)

        if match_index.size > 0:
            first_match_index = match_index.flatten()[0]
            CMC[first_match_index:] += 1

    CMC = CMC.float()
    CMC = CMC / len(query_labels) * 100  # average CMC
    mAP = AP / len(query_labels) * 100

    return CMC, mAP
Esempio n. 10
0
def train(opt):
    """ dataset preparation """
    if not opt.data_filtering_off:
        print(
            'Filtering the images containing characters which are not in opt.character'
        )
        print(
            'Filtering the images whose label is longer than opt.batch_max_length'
        )
        # see https://github.com/clovaai/deep-text-recognition-benchmark/blob/6593928855fb7abb999a99f428b3e4477d4ae356/dataset.py#L130

    opt.select_data = opt.select_data.split('-')
    opt.batch_ratio = opt.batch_ratio.split('-')
    train_dataset = Batch_Balanced_Dataset(opt)

    log = open(f'./saved_models/{opt.exp_name}/log_dataset.txt', 'a')
    AlignCollate_valid = AlignCollate(imgH=opt.imgH,
                                      imgW=opt.imgW,
                                      keep_ratio_with_pad=opt.PAD)
    valid_dataset, valid_dataset_log = hierarchical_dataset(
        root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=opt.batch_size,
        shuffle=
        True,  # 'True' to check training progress with validation function.
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_valid,
        pin_memory=True)
    log.write(valid_dataset_log)
    print('-' * 80)
    log.write('-' * 80 + '\n')
    log.close()
    """ model configuration """
    if 'CTC' in opt.Prediction:
        if opt.baiduCTC:
            converter = CTCLabelConverterForBaiduWarpctc(opt.character)
        else:
            converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print(f'Skip {name} as it is already initialized')
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:  # for batchnorm.
            if 'weight' in name:
                param.data.fill_(1)
            continue

    # data parallel for multi-GPU
    model = torch.nn.DataParallel(model).to(device)
    model.train()
    if opt.saved_model != '':
        print(f'loading pretrained model from {opt.saved_model}')
        if opt.FT:
            model.load_state_dict(torch.load(opt.saved_model), strict=False)
        else:
            model.load_state_dict(torch.load(opt.saved_model))
    print("Model:")
    print(model)
    """ setup loss """
    if 'CTC' in opt.Prediction:
        if opt.baiduCTC:
            # need to install warpctc. see our guideline.
            from warpctc_pytorch import CTCLoss
            criterion = CTCLoss()
        else:
            criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
    else:
        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(
            device)  # ignore [GO] token = ignore index 0
    # loss averager
    loss_avg = Averager()

    # filter that only require gradient decent
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))
    # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())]

    # setup optimizer
    if opt.adam:
        optimizer = optim.Adam(filtered_parameters,
                               lr=opt.lr,
                               betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters,
                                   lr=opt.lr,
                                   rho=opt.rho,
                                   eps=opt.eps)
    print("Optimizer:")
    print(optimizer)
    """ final options """
    # print(opt)
    with open(f'./saved_models/{opt.exp_name}/opt.txt', 'a') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += f'{str(k)}: {str(v)}\n'
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)
    """ start training """
    start_iter = 0
    if opt.saved_model != '':
        try:
            start_iter = int(opt.saved_model.split('_')[-1].split('.')[0])
            print(f'continue to train, start_iter: {start_iter}')
        except:
            pass

    start_time = time.time()
    best_accuracy = -1
    best_norm_ED = -1
    iteration = start_iter

    while (True):
        # train part
        image_tensors, labels = train_dataset.get_batch()
        image = image_tensors.to(device)
        text, length = converter.encode(labels,
                                        batch_max_length=opt.batch_max_length)
        batch_size = image.size(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text)
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            if opt.baiduCTC:
                preds = preds.permute(1, 0, 2)  # to use CTCLoss format
                cost = criterion(preds, text, preds_size, length) / batch_size
            else:
                preds = preds.log_softmax(2).permute(1, 0, 2)
                cost = criterion(preds, text, preds_size, length)

        else:
            preds = model(image, text[:, :-1])  # align with Attention.forward
            target = text[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.view(-1, preds.shape[-1]),
                             target.contiguous().view(-1))

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(
            model.parameters(),
            opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()

        loss_avg.add(cost)

        # validation part
        if (
                iteration + 1
        ) % opt.valInterval == 0 or iteration == 0:  # To see training progress, we also conduct validation when 'iteration == 0'
            elapsed_time = time.time() - start_time
            # for log
            with open(f'./saved_models/{opt.exp_name}/log_train.txt',
                      'a') as log:
                model.eval()
                with torch.no_grad():
                    valid_loss, current_accuracy, current_norm_ED, preds, confidence_score, labels, infer_time, length_of_data = validation(
                        model, criterion, valid_loader, converter, opt)
                model.train()

                # training loss and validation loss
                loss_log = f'[{iteration+1}/{opt.num_iter}] Train loss: {loss_avg.val():0.5f}, Valid loss: {valid_loss:0.5f}, Elapsed_time: {elapsed_time:0.5f}'
                loss_avg.reset()

                current_model_log = f'{"Current_accuracy":17s}: {current_accuracy:0.3f}, {"Current_norm_ED":17s}: {current_norm_ED:0.2f}'

                # keep best accuracy model (on valid dataset)
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(
                        model.state_dict(),
                        f'./saved_models/{opt.exp_name}/best_accuracy.pth')
                if current_norm_ED > best_norm_ED:
                    best_norm_ED = current_norm_ED
                    torch.save(
                        model.state_dict(),
                        f'./saved_models/{opt.exp_name}/best_norm_ED.pth')
                best_model_log = f'{"Best_accuracy":17s}: {best_accuracy:0.3f}, {"Best_norm_ED":17s}: {best_norm_ED:0.2f}'

                loss_model_log = f'{loss_log}\n{current_model_log}\n{best_model_log}'
                print(loss_model_log)
                log.write(loss_model_log + '\n')

                # show some predicted results
                dashed_line = '-' * 80
                head = f'{"Ground Truth":25s} | {"Prediction":25s} | Confidence Score & T/F'
                predicted_result_log = f'{dashed_line}\n{head}\n{dashed_line}\n'
                for gt, pred, confidence in zip(labels[:5], preds[:5],
                                                confidence_score[:5]):
                    if 'Attn' in opt.Prediction:
                        gt = gt[:gt.find('[s]')]
                        pred = pred[:pred.find('[s]')]

                    predicted_result_log += f'{gt:25s} | {pred:25s} | {confidence:0.4f}\t{str(pred == gt)}\n'
                predicted_result_log += f'{dashed_line}'
                print(predicted_result_log)
                log.write(predicted_result_log + '\n')

        # save model per 1e+5 iter.
        if (iteration + 1) % 1e+5 == 0:
            torch.save(
                model.state_dict(),
                f'./saved_models/{opt.exp_name}/iter_{iteration+1}.pth')

        if (iteration + 1) == opt.num_iter:
            print('end the training')
            sys.exit()
        iteration += 1
Esempio n. 11
0
 def get_text(self, audiopath_and_text):
     text = audiopath_and_text[1]
     text_norm = torch.IntTensor(text_to_sequence(text, self.text_cleaners))
     return text_norm
Esempio n. 12
0
def train(opt):
    """ dataset preparation """
    opt.select_data = opt.select_data.split('-')
    opt.batch_ratio = opt.batch_ratio.split('-')
    train_dataset = Batch_Balanced_Dataset(opt)

    AlignCollate_valid = AlignCollate(imgH=opt.imgH,
                                      imgW=opt.imgW,
                                      keep_ratio_with_pad=opt.PAD)
    valid_dataset = hierarchical_dataset(root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=opt.batch_size,
        shuffle=
        True,  # 'True' to check training progress with validation function.
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_valid,
        pin_memory=True)
    print('-' * 80)
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print(f'Skip {name} as it is already initialized')
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:  # for batchnorm.
            if 'weight' in name:
                param.data.fill_(1)
            continue

    # data parallel for multi-GPU
    model = torch.nn.DataParallel(model).to(device)
    model.train()
    if opt.continue_model != '':
        print(f'loading pretrained model from {opt.continue_model}')
        model.load_state_dict(torch.load(opt.continue_model))
    print("Model:")
    print(model)
    """ setup loss """
    if 'CTC' in opt.Prediction:
        criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
    else:
        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(
            device)  # ignore [GO] token = ignore index 0
    # loss averager
    loss_avg = Averager()

    # filter that only require gradient decent
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))
    # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())]

    # setup optimizer
    if opt.adam:
        optimizer = optim.Adam(filtered_parameters,
                               lr=opt.lr,
                               betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters,
                                   lr=opt.lr,
                                   rho=opt.rho,
                                   eps=opt.eps)
    print("Optimizer:")
    print(optimizer)
    """ final options """
    # print(opt)
    with open(f'./saved_models/{opt.experiment_name}/opt.txt',
              'a') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += f'{str(k)}: {str(v)}\n'
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)
    """ start training """
    start_iter = 0
    if opt.continue_model != '':
        start_iter = int(opt.continue_model.split('_')[-1].split('.')[0])
        print(f'continue to train, start_iter: {start_iter}')

    start_time = time.time()
    best_accuracy = -1
    best_norm_ED = 1e+6
    i = start_iter

    while (True):
        # train part
        image_tensors, labels = train_dataset.get_batch()
        image = image_tensors.to(device)
        text, length = converter.encode(labels,
                                        batch_max_length=opt.batch_max_length)
        batch_size = image.size(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text).log_softmax(2)
            preds_size = torch.IntTensor([preds.size(1)] *
                                         batch_size).to(device)
            preds = preds.permute(1, 0, 2)  # to use CTCLoss format

            # To avoid ctc_loss issue, disabled cudnn for the computation of the ctc_loss
            # https://github.com/jpuigcerver/PyLaia/issues/16
            torch.backends.cudnn.enabled = False
            cost = criterion(preds, text, preds_size, length)
            torch.backends.cudnn.enabled = True

        else:
            preds = model(image, text[:, :-1])  # align with Attention.forward
            target = text[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.view(-1, preds.shape[-1]),
                             target.contiguous().view(-1))

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(
            model.parameters(),
            opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()

        loss_avg.add(cost)

        # validation part
        if i % opt.valInterval == 0:
            elapsed_time = time.time() - start_time
            print(
                f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}'
            )
            # for log
            with open(f'./saved_models/{opt.experiment_name}/log_train.txt',
                      'a') as log:
                log.write(
                    f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n'
                )
                loss_avg.reset()

                model.eval()
                with torch.no_grad():
                    valid_loss, current_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data = validation(
                        model, criterion, valid_loader, converter, opt)
                model.train()

                for pred, gt in zip(preds[:5], labels[:5]):
                    if 'Attn' in opt.Prediction:
                        pred = pred[:pred.find('[s]')]
                        gt = gt[:gt.find('[s]')]
                    print(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}')
                    log.write(
                        f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}\n')

                valid_log = f'[{i}/{opt.num_iter}] valid loss: {valid_loss:0.5f}'
                valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ED:0.2f}'
                print(valid_log)
                log.write(valid_log + '\n')

                # keep best accuracy model
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(
                        model.state_dict(),
                        f'./saved_models/{opt.experiment_name}/best_accuracy.pth'
                    )
                if current_norm_ED < best_norm_ED:
                    best_norm_ED = current_norm_ED
                    torch.save(
                        model.state_dict(),
                        f'./saved_models/{opt.experiment_name}/best_norm_ED.pth'
                    )
                best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ED: {best_norm_ED:0.2f}'
                print(best_model_log)
                log.write(best_model_log + '\n')

        # save model per 1e+5 iter.
        if (i + 1) % 1e+5 == 0:
            torch.save(model.state_dict(),
                       f'./saved_models/{opt.experiment_name}/iter_{i+1}.pth')

        if i == opt.num_iter:
            print('end the training')
            sys.exit()
        i += 1
Esempio n. 13
0
#创建tensor(关于numoy)

a = np.array([2, 3.3])
torch.from_numpy(a)

a = np.ones([2, 3])
torch.from_numpy(a)

#这里tensor函数(小写)接收的是具体的数据;Tensor/Float Tensor()接收的是shape
torch.tensor([2, 3.2])
torch.FloatTensor(2, 3)

#申请一片控件(未初始化的数据填充;但数据奇怪
torch.empty(1)
torch.FloatTensor(2, 2, 2)  #这里给的同样是shape,如果需要具体数据以list形式写入
torch.IntTensor(3, 3, 3, 3)

#Tensor()初始化类型是float,但使用中常用double;
torch.set_default_tensor_type(torch.DoubleTensor)
torch.tensor([1.2, 3]).type()

#随机初始化
#rand是[0,1]的随机分布;randn是(0,1)的正态分布;normal的API操作更加复杂
a = torch.rand(3, 3)  #三行三列
torch.rand_like(a)
torch.normal(mean=torch.full([10], 0), std=torch.arange(1, 0, -0.1))
#使用arrange可以生成[start,end)的一个等差数列,默认1递增

#linsapce/logspace以steps参数值进行[start,end]区间的等长或者log长划分
torch.linspace(0, 10, steps=4)
torch.logspace(0, 10, steps=4)
Esempio n. 14
0
def dataGenerator(file_name='../data/trainYelp.txt',
                  train_split=0.8,
                  max_length=1014,
                  indexing_choice=0,
                  nb_merge=50):
    if (indexing_choice == 0):
        index = indexing
    if (indexing_choice == 1):
        index = bigIndexing
    if (indexing_choice == 2):
        index = altIndexing
    list_string = []
    with open(file_name) as infile:
        while True:
            text = infile.readline()
            if len(text) == 0:
                break
            info = json.loads(text)
            data = info["review"]
            list_string.append(data)
    list_subword = create_vocab(list_string, nb_merge)
    print("end training BPE")
    list_subword_witout_end = create_list_sobword_without_end(list_subword)
    start_index = max(index.values())
    for i, sub in enumerate(list_subword_witout_end):
        index[sub] = start_index + i + 1
    print('index', index)
    dataset = []
    print("start encoding training")
    with open(file_name) as infile:
        while True:
            text = infile.readline()
            if len(text) == 0:
                break
            info = json.loads(text)
            rating = info["rating"]
            data = info["review"]
            review = torch.zeros(max_length).long()
            tokenizer = RegexpTokenizer(r'\w+')
            list_words = tokenizer.tokenize(data)
            list_word_subwords = [
                transform_BPE_word(i, list_subword_witout_end)
                for i in list_words
            ]
            list_subwords = []
            for word in list_word_subwords:
                list_subwords += word
                list_subwords += ' '
            for i in range(min(max_length, len(list_subwords))):
                unit = list_subwords[i].lower()
                if unit in index:
                    review[i] = index[unit]
                else:
                    review[i] = index['UNK']
            dataset.append({
                'review': review,
                'rating': torch.IntTensor([rating])
            })
    print("end encoding training")
    #random split 0.8 / 0.2
    dataset_train, dataset_val = train_test_split(dataset,
                                                  test_size=1 - train_split)
    alphabet_size = max(index.values()) + 1
    return dataset_train, dataset_val, list_subword_witout_end, alphabet_size
Esempio n. 15
0
def test_private_compare(workers):
    """
    Test private compare which returns: β′ = β ⊕ (x > r).
    """
    alice, bob, james = workers["alice"], workers["bob"], workers["james"]
    L = 2**64
    x_bit_sh = (decompose(torch.LongTensor([13]),
                          L).share(alice,
                                   bob,
                                   crypto_provider=james,
                                   field=67,
                                   dtype="custom").child)
    r = torch.LongTensor([12]).send(alice, bob).child

    beta = torch.LongTensor([1]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert not beta_p

    beta = torch.LongTensor([0]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert beta_p

    # Big values
    x_bit_sh = (decompose(torch.LongTensor([2**60]),
                          L).share(alice,
                                   bob,
                                   crypto_provider=james,
                                   field=67,
                                   dtype="custom").child)
    r = torch.LongTensor([2**61]).send(alice, bob).child

    beta = torch.LongTensor([1]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert beta_p

    beta = torch.LongTensor([0]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert not beta_p

    # Multidimensional tensors
    x_bit_sh = (decompose(torch.LongTensor([[13, 44], [1, 28]]),
                          L).share(alice,
                                   bob,
                                   crypto_provider=james,
                                   field=67,
                                   dtype="custom").child)
    r = torch.LongTensor([[12, 44], [12, 33]]).send(alice, bob).child

    beta = torch.LongTensor([1]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert (beta_p == torch.tensor([[0, 1], [1, 1]])).all()

    beta = torch.LongTensor([0]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert (beta_p == torch.tensor([[1, 0], [0, 0]])).all()

    # Negative values
    x_val = -105
    r_val = -52 % 2**63  # The protocol works only for values in Zq
    x_bit_sh = (decompose(torch.LongTensor([x_val]),
                          L).share(alice,
                                   bob,
                                   crypto_provider=james,
                                   field=67,
                                   dtype="custom").child)
    r = torch.LongTensor([r_val]).send(alice, bob).child

    beta = torch.LongTensor([1]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert beta_p

    beta = torch.LongTensor([0]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert not beta_p

    # With dtype int
    L = 2**32

    x_bit_sh = (decompose(torch.IntTensor([13]),
                          L).share(alice,
                                   bob,
                                   crypto_provider=james,
                                   field=67,
                                   dtype="custom").child)
    r = torch.IntTensor([12]).send(alice, bob).child

    beta = torch.IntTensor([1]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert not beta_p

    beta = torch.IntTensor([0]).send(alice, bob).child
    beta_p = private_compare(x_bit_sh, r, beta, L)
    assert beta_p
 def get_text(self, text):
     text_encoded = torch.IntTensor(self.tp.encode_text(text))
     return text_encoded
Esempio n. 17
0
def test_forward(args):
    args = make_args(**args)

    batch_size = 4
    xmaxs = [40, 45] if args['chunk_size_left'] == -1 else [400, 455]
    device = "cpu"

    module = importlib.import_module('neural_sp.models.seq2seq.encoders.rnn')
    enc = module.RNNEncoder(**args)
    enc = enc.to(device)

    for xmax in xmaxs:
        xs = np.random.randn(batch_size, xmax,
                             args['input_dim']).astype(np.float32)
        xlens = torch.IntTensor(
            [len(x) - i * enc.subsampling_factor for i, x in enumerate(xs)])
        xs = pad_list([np2tensor(x, device).float() for x in xs], 0.)
        enc_out_dict = enc(xs, xlens, task='all')

        assert enc_out_dict['ys']['xs'].size(0) == batch_size
        assert enc_out_dict['ys']['xs'].size(
            1) == enc_out_dict['ys']['xlens'].max()
        for b in range(batch_size):
            if 'conv' in args['rnn_type'] or args['subsample_type'] in [
                    'max_pool', '1dconv'
            ]:
                assert enc_out_dict['ys']['xlens'][b].item() == math.ceil(
                    xlens[b].item() / enc.subsampling_factor)
            else:
                assert enc_out_dict['ys']['xlens'][b].item() == math.floor(
                    xlens[b].item() / enc.subsampling_factor)
        if args['n_layers_sub1'] > 0:
            # all outputs
            assert enc_out_dict['ys_sub1']['xs'].size(0) == batch_size
            assert enc_out_dict['ys_sub1']['xs'].size(
                1) == enc_out_dict['ys_sub1']['xlens'].max()
            for b in range(batch_size):
                if 'conv' in args['rnn_type'] or args['subsample_type'] in [
                        'max_pool', '1dconv'
                ]:
                    assert enc_out_dict['ys_sub1']['xlens'][b].item(
                    ) == math.ceil(xlens[b].item() / enc.subsampling_factor)
                else:
                    assert enc_out_dict['ys_sub1']['xlens'][b].item(
                    ) == math.floor(xlens[b].item() / enc.subsampling_factor)
            # single output
            enc_out_dict_sub1 = enc(xs, xlens, task='ys_sub1')
            assert enc_out_dict_sub1['ys_sub1']['xs'].size(0) == batch_size
            assert enc_out_dict_sub1['ys_sub1']['xs'].size(
                1) == enc_out_dict['ys_sub1']['xlens'].max()

        if args['n_layers_sub2'] > 0:
            # all outputs
            assert enc_out_dict['ys_sub2']['xs'].size(0) == batch_size
            assert enc_out_dict['ys_sub2']['xs'].size(
                1) == enc_out_dict['ys_sub2']['xlens'].max()
            for b in range(batch_size):
                if 'conv' in args['rnn_type'] or args['subsample_type'] in [
                        'max_pool', '1dconv'
                ]:
                    assert enc_out_dict['ys_sub2']['xlens'][b].item(
                    ) == math.ceil(xlens[b].item() / enc.subsampling_factor)
                else:
                    assert enc_out_dict['ys_sub2']['xlens'][b].item(
                    ) == math.floor(xlens[b].item() / enc.subsampling_factor)
            # single output
            enc_out_dict_sub12 = enc(xs, xlens, task='ys_sub2')
            assert enc_out_dict_sub12['ys_sub2']['xs'].size(0) == batch_size
            assert enc_out_dict_sub12['ys_sub2']['xs'].size(
                1) == enc_out_dict_sub12['ys_sub2']['xlens'].max()
Esempio n. 18
0
    def predict(self, image_list):
        if len(image_list) <= 0:
            return [""]
        demo_data = RawDataset(image_list, opt=self.opt)  # use RawDataset
        demo_loader = torch.utils.data.DataLoader(
            demo_data,
            batch_size=self.opt.batch_size,
            shuffle=False,
            num_workers=int(self.opt.workers),
            collate_fn=self.AlignCollate_demo,
            pin_memory=True)

        # predict
        ret = []
        self.model.eval()
        with torch.no_grad():
            for image_tensors, image_path_list in demo_loader:
                batch_size = image_tensors.size(0)
                image = image_tensors.to(device)
                # For max length prediction
                length_for_pred = torch.IntTensor([self.opt.batch_max_length] *
                                                  batch_size).to(device)
                text_for_pred = torch.LongTensor(batch_size,
                                                 self.opt.batch_max_length +
                                                 1).fill_(0).to(device)

                if 'CTC' in self.opt.Prediction:
                    preds = self.model(image, text_for_pred)

                    # Select max probabilty (greedy decoding) then decode index to character
                    preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                    _, preds_index = preds.max(2)
                    preds_index = preds_index.view(-1)
                    preds_str = self.converter.decode(preds_index.data,
                                                      preds_size.data)

                else:
                    preds = self.model(image, text_for_pred, is_train=False)

                    # select max probabilty (greedy decoding) then decode index to character
                    _, preds_index = preds.max(2)
                    preds_str = self.converter.decode(preds_index,
                                                      length_for_pred)

                # log = open(f'./log_demo_result.txt', 'a')
                # dashed_line = '-' * 80
                # head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

                # print(f'{dashed_line}\n{head}\n{dashed_line}')
                # log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

                preds_prob = F.softmax(preds, dim=2)
                preds_max_prob, _ = preds_prob.max(dim=2)
                for img_name, pred, pred_max_prob in zip(
                        image_path_list, preds_str, preds_max_prob):
                    if 'Attn' in self.opt.Prediction:
                        pred_EOS = pred.find('[s]')
                        pred = pred[:
                                    pred_EOS]  # prune after "end of sentence" token ([s])
                        pred_max_prob = pred_max_prob[:pred_EOS]

                    # calculate confidence score (= multiply of pred_max_prob)
                    confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                    if confidence_score <= 0.4:
                        pred = "None"
                    ret.append(pred)
                    # ret.append(preds_str)
                    print(
                        f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
        return ret
Esempio n. 19
0
 def preprocess(self, image):
     image = load_image(image)
     target_size = torch.IntTensor([[image.height, image.width]])
     inputs = self.feature_extractor(images=[image], return_tensors="pt")
     inputs["target_size"] = target_size
     return inputs
Esempio n. 20
0
import kay
import random
import math
from tqdm import tqdm

pic_res = 256
objs = []
bunny = pykay.OBJ("./models/01/", "bunny.obj")
objs.append(bunny)

rt = kay.Rtcore()
vertex = []
index = []
for i in range(len(objs)):
    vertex.append(torch.Tensor(objs[i].vertices))
    index.append(torch.IntTensor(objs[i].faces))
    print(objs[i].vcount, objs[i].fcount)
    rt.addGeo(kay.float_ptr(vertex[i].data_ptr()),
              kay.unsigned_int_ptr(index[i].data_ptr()), objs[i].vcount,
              objs[i].fcount)
rt.RTsetup()
# camera info init
pos = torch.Tensor([0.0, 0.0, 0.2])
look = torch.Tensor([0, 0, 0])
up = torch.Tensor([0, 1, 0])
c = pykay.Camera(pos, look, up, 1.0, 1.0)
center = c.pos + c.f_dist * c.look_dir
temp = c.f_dist * c._fov  # half height
right = torch.cross(c.look_dir, c.up)
left_up = center + temp * c.up - temp * right
Esempio n. 21
0
def collate(
    samples,
    pad_idx,
    chunk_width,
    chunk_left_context,
    chunk_right_context,
    label_delay,
    seed,
    epoch,
    pad_to_length=None,
    pad_to_multiple=1,
    src_bucketed=False,
    random_chunking=True,
):
    if len(samples) == 0:
        return {}

    def merge(key, pad_to_length=None):
        if key == "source":
            return speech_utils.collate_frames(
                [s[key] for s in samples],
                0.0,
                pad_to_length=pad_to_length,
                pad_to_multiple=pad_to_multiple,
            )
        elif key == "target":
            return data_utils.collate_tokens(
                [s[key] for s in samples],
                pad_idx=pad_idx,
                eos_idx=None,
                left_pad=False,
                move_eos_to_beginning=False,
                pad_to_length=pad_to_length,
                pad_to_multiple=pad_to_multiple,
            )
        else:
            raise ValueError("Invalid key.")

    def chunking(src_item, tgt_item, tgt_start):
        # make a src chunk in the range [begin_src, end_src)
        begin_src = max(0, tgt_start + label_delay - chunk_left_context)
        # ok if end_src past the end of utterance
        end_src = tgt_start + label_delay + chunk_width + chunk_right_context
        # replication pad if necessary
        left_pad = max(0, chunk_left_context - tgt_start - label_delay)
        right_pad = max(0, end_src - src_item.size(0))
        src_item = src_item[begin_src:end_src]
        if left_pad > 0 or right_pad > 0:
            src_item = F.pad(
                src_item.t().unsqueeze(0),
                (left_pad, right_pad),
                mode="replicate",
            ).squeeze(0).t()

        if tgt_item is not None:
            # make a tgt chunk in the range [begin_tgt, end_tgt)
            begin_tgt = tgt_start
            end_tgt = tgt_start + chunk_width  # ok if past the end of utterance
            # replication pad if necessary
            right_pad = max(0, end_tgt - tgt_item.size(0))
            tgt_item = tgt_item[begin_tgt:end_tgt]
            if right_pad > 0:
                tgt_item = torch.cat(
                    (tgt_item, tgt_item.new_full((right_pad, ), pad_idx)), 0)
        return src_item, tgt_item

    if chunk_width is None or random_chunking:
        if chunk_width is not None:  # usually for chunk-wise train data
            # no need to sort as all chunks have exactly the same length
            for s in samples:
                with data_utils.numpy_seed(seed, epoch, s["id"]):
                    # generate a chunk by sampling the index of its first label
                    f = np.random.randint(s["source"].size(0) - chunk_width +
                                          1)
                s["source"], s["target"] = chunking(s["source"], s["target"],
                                                    f)
        elif label_delay != 0:  # shift source according to label_delay
            if label_delay > 0:
                left_pad, right_pad = 0, label_delay
            else:
                left_pad, right_pad = -label_delay, 0
            for s in samples:
                src_item = s["source"]
                src_item = F.pad(
                    src_item.t().unsqueeze(0),
                    (left_pad, right_pad),
                    mode="replicate",
                ).squeeze(0).t()
                if label_delay > 0:
                    s["source"] = src_item[label_delay:]
                else:
                    s["source"] = src_item[:label_delay]

        if pad_to_length is not None or src_bucketed:
            src_lengths = torch.IntTensor(
                [s["source"].ne(0.0).any(dim=1).int().sum() for s in samples])
        else:
            src_lengths = torch.IntTensor(
                [s["source"].size(0) for s in samples])
        id = torch.LongTensor([s["id"] for s in samples])
        utt_id = [s["utt_id"] for s in samples]
        src_frames = merge(
            "source",
            pad_to_length=pad_to_length["source"]
            if pad_to_length is not None else None,
        )

        target = None
        if samples[0].get("target", None) is not None:
            target = merge(
                "target",
                pad_to_length=pad_to_length["target"]
                if pad_to_length is not None else None,
            )
            ntokens = sum(s["target"].ne(pad_idx).int().sum().item()
                          for s in samples)
        else:
            ntokens = src_lengths.sum().item()

        text = None
        if samples[0].get("text", None) is not None:
            text = [s["text"] for s in samples]

        if chunk_width is None:  # for whole utterances (i.e., no chunking)
            # sort by descending source length
            src_lengths, sort_order = src_lengths.sort(descending=True)
            id = id.index_select(0, sort_order)
            utt_id = [utt_id[i] for i in sort_order.numpy()]
            src_frames = src_frames.index_select(0, sort_order)
            if target is not None:
                target = target.index_select(0, sort_order)
            if text is not None:
                text = [text[i] for i in sort_order.numpy()]

        batch = {
            "id": id,
            "utt_id": utt_id,
            "nsentences": len(samples),
            "ntokens": ntokens,
            "net_input": {
                "src_tokens": src_frames,
                "src_lengths": src_lengths
            },
            "target": target,
            "text": text,
        }
        return batch
    else:  # sequential chunking, usually for chunk-wise test data
        if pad_to_length is not None or src_bucketed:
            src_lengths = torch.IntTensor(
                [s["source"].ne(0.0).any(dim=1).int().sum() for s in samples])
        else:
            src_lengths = torch.IntTensor(
                [s["source"].size(0) for s in samples])
        id = torch.LongTensor([s["id"] for s in samples])
        utt_id = [s["utt_id"] for s in samples]
        ori_source = [s["source"] for s in samples]
        ori_target = [s["target"] for s in samples]
        text = None
        if samples[0].get("text", None) is not None:
            text = [s["text"] for s in samples]
        max_length = max(src.size(0) for src in ori_source)
        num_chunks = (max_length + chunk_width - 1) // chunk_width
        batches = []
        for k in range(num_chunks):
            f = k * chunk_width
            for i, s in enumerate(samples):
                if f < src_lengths[i].item():
                    s["source"], s["target"] = chunking(
                        ori_source[i], ori_target[i], f)
                else:
                    s["source"] = ori_source[i].new_zeros(
                        chunk_width + chunk_left_context + chunk_right_context,
                        ori_source[i].size(1))
                    s["target"] = (ori_target[i].new_full(
                        (chunk_width, ), pad_idx)
                                   if ori_target[i] is not None else None)
            src_frames = merge(
                "source",
                pad_to_length=pad_to_length["source"]
                if pad_to_length is not None else None,
            )
            src_chunk_lengths = torch.IntTensor(
                [s["source"].size(0) for s in samples])

            target = None
            if samples[0].get("target", None) is not None:
                target = merge(
                    "target",
                    pad_to_length=pad_to_length["target"]
                    if pad_to_length is not None else None,
                )
                ntokens = sum(s["target"].ne(pad_idx).int().sum().item()
                              for s in samples)
            else:
                ntokens = src_lengths.sum().item()

            batch = {
                "id": id,
                "utt_id": utt_id,
                "nsentences": len(samples) if k == 0 else 0,
                "ntokens": ntokens,
                "net_input": {
                    "src_tokens": src_frames,
                    "src_lengths": src_chunk_lengths
                },
                "target": target,
                "text": text,
            }
            batches.append(batch)
        return batches
Esempio n. 22
0
    input = torch.randn((128, 1, 24, 24, 24)).cuda()
    print('input', input.shape)
    out = Conv3d_1(input.shape[1], 256, 5).cuda()(input)
    print('After cov1', out.shape)
    out = PrimaryCapsules(input_shape=(256, 16, 16, 16),
                          capsule_dim=8,
                          out_channels=32,
                          kernel_size=9,
                          stride=2).cuda()(out)
    print('After PrimaryCapsules', out.shape)
    out = Routing().cuda()(out, 2)
    print('After Routing', out.shape)
    score = Norm()(out)
    print('After Norm', score.shape)
    decoder = Decoder(16, int(np.prod((1, 24, 24, 24))),
                      (1, 24, 24, 24)).cuda()
    y = torch.IntTensor(
        np.array([np.random.randint(0, 10) for i in range(128)]))
    reconstruction = decoder(out, y).view((-1, ) + (1, 24, 24, 24))
    print('After reconstruction', reconstruction.shape)
    model = PointCapsNet((1, 24, 24, 24), 3).cuda()
    y_pred, x_reconstruction = model(input, y)
    print('x shape', input.shape)
    print('y shape', y.shape)
    print(y_pred.shape)
    print(x_reconstruction.shape)
    # Draw network structure
    #from torchviz import make_dot
    #draw = make_dot((y_pred, x_reconstruction), params=dict(model.named_parameters()))
    #draw.view()
Esempio n. 23
0
def test_multi_loss_factory():
    from mmpose.models import build_loss

    # test heatmap loss
    loss_cfg = dict(type='HeatmapLoss')
    loss = build_loss(loss_cfg)

    with pytest.raises(AssertionError):
        fake_pred = torch.zeros((2, 3, 64, 64))
        fake_label = torch.zeros((1, 3, 64, 64))
        fake_mask = torch.zeros((1, 64, 64))
        loss(fake_pred, fake_label, fake_mask)

    fake_pred = torch.zeros((1, 3, 64, 64))
    fake_label = torch.zeros((1, 3, 64, 64))
    fake_mask = torch.zeros((1, 64, 64))
    assert torch.allclose(loss(fake_pred, fake_label, fake_mask),
                          torch.tensor(0.))

    fake_pred = torch.ones((1, 3, 64, 64))
    fake_label = torch.zeros((1, 3, 64, 64))
    fake_mask = torch.zeros((1, 64, 64))
    assert torch.allclose(loss(fake_pred, fake_label, fake_mask),
                          torch.tensor(0.))

    fake_pred = torch.ones((1, 3, 64, 64))
    fake_label = torch.zeros((1, 3, 64, 64))
    fake_mask = torch.ones((1, 64, 64))
    assert torch.allclose(loss(fake_pred, fake_label, fake_mask),
                          torch.tensor(1.))

    # test AE loss
    fake_tags = torch.zeros((1, 18, 1))
    fake_joints = torch.zeros((1, 3, 2, 2), dtype=torch.int)

    loss_cfg = dict(type='AELoss', loss_type='exp')
    loss = build_loss(loss_cfg)
    assert torch.allclose(loss(fake_tags, fake_joints)[0], torch.tensor(0.))
    assert torch.allclose(loss(fake_tags, fake_joints)[1], torch.tensor(0.))

    fake_tags[0, 0, 0] = 1.
    fake_tags[0, 10, 0] = 0.
    fake_joints[0, 0, 0, :] = torch.IntTensor((0, 1))
    fake_joints[0, 0, 1, :] = torch.IntTensor((10, 1))
    loss_cfg = dict(type='AELoss', loss_type='exp')
    loss = build_loss(loss_cfg)
    assert torch.allclose(loss(fake_tags, fake_joints)[0], torch.tensor(0.))
    assert torch.allclose(loss(fake_tags, fake_joints)[1], torch.tensor(0.25))

    fake_tags[0, 0, 0] = 0
    fake_tags[0, 7, 0] = 1.
    fake_tags[0, 17, 0] = 1.
    fake_joints[0, 1, 0, :] = torch.IntTensor((7, 1))
    fake_joints[0, 1, 1, :] = torch.IntTensor((17, 1))

    loss_cfg = dict(type='AELoss', loss_type='exp')
    loss = build_loss(loss_cfg)
    assert torch.allclose(loss(fake_tags, fake_joints)[1], torch.tensor(0.))

    loss_cfg = dict(type='AELoss', loss_type='max')
    loss = build_loss(loss_cfg)
    assert torch.allclose(loss(fake_tags, fake_joints)[0], torch.tensor(0.))

    with pytest.raises(ValueError):
        loss_cfg = dict(type='AELoss', loss_type='min')
        loss = build_loss(loss_cfg)
        loss(fake_tags, fake_joints)

    # test MultiLossFactory
    with pytest.raises(AssertionError):
        loss_cfg = dict(type='MultiLossFactory',
                        num_joints=2,
                        num_stages=1,
                        ae_loss_type='exp',
                        with_ae_loss=True,
                        push_loss_factor=[0.001],
                        pull_loss_factor=[0.001],
                        with_heatmaps_loss=[True],
                        heatmaps_loss_factor=[1.0])
        loss = build_loss(loss_cfg)
    with pytest.raises(AssertionError):
        loss_cfg = dict(type='MultiLossFactory',
                        num_joints=2,
                        num_stages=1,
                        ae_loss_type='exp',
                        with_ae_loss=[True],
                        push_loss_factor=0.001,
                        pull_loss_factor=[0.001],
                        with_heatmaps_loss=[True],
                        heatmaps_loss_factor=[1.0])
        loss = build_loss(loss_cfg)
    with pytest.raises(AssertionError):
        loss_cfg = dict(type='MultiLossFactory',
                        num_joints=2,
                        num_stages=1,
                        ae_loss_type='exp',
                        with_ae_loss=[True],
                        push_loss_factor=[0.001],
                        pull_loss_factor=0.001,
                        with_heatmaps_loss=[True],
                        heatmaps_loss_factor=[1.0])
        loss = build_loss(loss_cfg)
    with pytest.raises(AssertionError):
        loss_cfg = dict(type='MultiLossFactory',
                        num_joints=2,
                        num_stages=1,
                        ae_loss_type='exp',
                        with_ae_loss=[True],
                        push_loss_factor=[0.001],
                        pull_loss_factor=[0.001],
                        with_heatmaps_loss=True,
                        heatmaps_loss_factor=[1.0])
        loss = build_loss(loss_cfg)
    with pytest.raises(AssertionError):
        loss_cfg = dict(type='MultiLossFactory',
                        num_joints=2,
                        num_stages=1,
                        ae_loss_type='exp',
                        with_ae_loss=[True],
                        push_loss_factor=[0.001],
                        pull_loss_factor=[0.001],
                        with_heatmaps_loss=[True],
                        heatmaps_loss_factor=1.0)
        loss = build_loss(loss_cfg)
    loss_cfg = dict(type='MultiLossFactory',
                    num_joints=17,
                    num_stages=1,
                    ae_loss_type='exp',
                    with_ae_loss=[False],
                    push_loss_factor=[0.001],
                    pull_loss_factor=[0.001],
                    with_heatmaps_loss=[False],
                    heatmaps_loss_factor=[1.0])
    loss = build_loss(loss_cfg)
    fake_outputs = [torch.zeros((1, 34, 64, 64))]
    fake_heatmaps = [torch.zeros((1, 17, 64, 64))]
    fake_masks = [torch.ones((1, 64, 64))]
    fake_joints = [torch.zeros((1, 30, 17, 2))]
    heatmaps_losses, push_losses, pull_losses = \
        loss(fake_outputs, fake_heatmaps, fake_masks, fake_joints)
    assert heatmaps_losses == [None]
    assert pull_losses == [None]
    assert push_losses == [None]
    loss_cfg = dict(type='MultiLossFactory',
                    num_joints=17,
                    num_stages=1,
                    ae_loss_type='exp',
                    with_ae_loss=[True],
                    push_loss_factor=[0.001],
                    pull_loss_factor=[0.001],
                    with_heatmaps_loss=[True],
                    heatmaps_loss_factor=[1.0])
    loss = build_loss(loss_cfg)
    heatmaps_losses, push_losses, pull_losses = \
        loss(fake_outputs, fake_heatmaps, fake_masks, fake_joints)
    assert len(heatmaps_losses) == 1
Esempio n. 24
0
    def forward(self, inputs: dict) -> Union[dict, tuple]:
        """
        inputs is a dict containing the below keys. The format of the tensors
        are indicated as e.g. `BTC`, `BMC` (etc), which can be interpreted as
        following.
            B: batch size,
            T: size of the rolling window over health history (i.e. number of
               time-stamps),
            C: number of generic channels,
            M: number of encounters,
        Elements with pre-determined shapes are indicated as such.
        For example:
            - B(14) indicates a tensor of shape (B, 14),
            - BM1 indicates a tensor of shape (B, M, 1)
            - B(T=14)C indicates a tensor of shape (B, 14, C) where 14
                is the currently set size of the rolling window.

        Parameters
        ----------
        inputs : dict
            A python dict with the following keys:
                -> `health_history`: a B(T=14)C tensor of the 14-day health
                    history (symptoms + test results + day) of the individual.
                -> `health_profile`: a BC tensor of the health profile
                    containing (age + health + preexisting_conditions) of the
                    individual.
                -> `history_days`: a B(T=14)1 tensor of the day corresponding to the
                    T dimension in `health_history`.
                -> `encounter_health`: a BMC tensor of health during an
                    encounter indexed by M.
                -> `encounter_message`: a BMC tensor of the received
                    message from the encounter partner.
                -> `encounter_day`: a BM1 tensor of the encounter day.
                -> `encounter_duration`: a BM1 tensor of the encounter duration.
                    This is not the actual duration, but a proxy (for the number
                    of encounters)
                -> `encounter_partner_id`: a binary  BMC tensor specifying
                    the ID of the encounter partner.
                -> `mask`: a BM mask tensor distinguishing the valid entries (1)
                    from padding (0) in the set valued inputs.
                -> `valid_history_mask`: a B(14) mask tensor distinguising valid
                    points in history (1) from padding (0).
        Returns
        -------
        dict
            A dict containing the keys "encounter_variables" and "latent_variable".
        """
        # -------- Shape Wrangling --------
        batch_size = inputs["health_history"].shape[0]
        num_history_days = inputs["health_history"].shape[1]
        num_encounters = inputs["encounter_health"].shape[1]
        if not isinstance(num_encounters, torch.Tensor):  # for tracing
            # noinspection PyArgumentList
            num_encounters = torch.IntTensor([num_encounters])[0]
        # -------- Embeddings --------
        embeddings = self.embed(inputs)
        # -------- Self Attention --------
        # Prepare the entities -- one set for the encounters and the other for self health
        # Before we start, expand health profile from BC to BMC and append to entities
        expanded_health_profile_per_encounter = embeddings[
            "embedded_health_profile"][:, None, :].expand(
                batch_size, num_encounters,
                embeddings["embedded_health_profile"].shape[-1])
        encounter_entities = torch.cat(
            [
                embeddings["embedded_encounter_day"],
                embeddings["embedded_encounter_partner_ids"],
                embeddings["embedded_encounter_duration"],
                embeddings["embedded_encounter_health"],
                embeddings["embedded_encounter_messages"],
                expanded_health_profile_per_encounter,
            ],
            dim=-1,
        )
        # Expand the messages and placeholders from C to BTC
        expanded_message_placeholder = self.message_placeholder[
            None, None].expand(
                batch_size,
                num_history_days,
                embeddings["embedded_encounter_messages"].shape[-1],
            )
        expanded_pid_placeholder = self.partner_id_placeholder[
            None, None].expand(
                batch_size,
                num_history_days,
                embeddings["embedded_encounter_partner_ids"].shape[-1],
            )
        expanded_duration_placeholder = self.duration_placeholder[
            None, None].expand(
                batch_size,
                num_history_days,
                embeddings["embedded_encounter_duration"].shape[-1],
            )
        # Expand the health profile from C to BTC
        expanded_health_profile_per_day = embeddings[
            "embedded_health_profile"][:, None, :].expand(
                batch_size,
                num_history_days,
                embeddings["embedded_health_profile"].shape[-1],
            )
        self_entities = torch.cat(
            [
                embeddings["embedded_history_days"],
                expanded_pid_placeholder,
                expanded_duration_placeholder,
                embeddings["embedded_health_history"],
                expanded_message_placeholder,
                expanded_health_profile_per_day,
            ],
            dim=-1,
        )
        # Concatenate encounter and self entities in to one big set (before passing to
        # the self attention blocks). In addition, expand inputs.mask to account for
        # masking the entire set of entities.
        entities = torch.cat([encounter_entities, self_entities], dim=1)
        expanded_mask = torch.cat(
            [inputs["mask"], inputs["valid_history_mask"]], dim=1)
        entities = self.entity_masker(entities, expanded_mask)
        # Grab a copy of the "meta-data", which we will be appending to entities at
        # every step. These meta-data are the time-stamps and partner_ids
        meta_data = self._get_embedding_meta_data(
            entities,
            embeddings["embedded_history_days"],
            embeddings["embedded_encounter_partner_ids"],
            embeddings["embedded_encounter_duration"],
        )
        # Make a mask for the attention mech. This mask prevents attention between
        # two entities if either one of them is a padding entity.
        attention_mask = expanded_mask[:, :, None] * expanded_mask[:, None, :]
        entities = self._attention_loop(entities, meta_data, attention_mask,
                                        expanded_mask)
        # -------- Latent Variables
        pre_latent_variable = self._get_pre_latent_variable(
            entities, num_encounters)
        # Push through the latent variable MLP to get the latent variables
        # latent_variable.shape = BTC
        if not isinstance(self.latent_variable_mlp, nn.ModuleDict):
            latent_variable_mlps = {
                "latent_variable": self.latent_variable_mlp
            }
        else:
            latent_variable_mlps = self.latent_variable_mlp
        latent_variables = {
            key: mlp(pre_latent_variable)
            for key, mlp in latent_variable_mlps.items()
        }
        # -------- Generate Output Variables --------
        # Process encounters to their variables
        pre_encounter_variables = self._get_pre_encounter_variables(
            entities,
            embeddings["embedded_history_days"],
            embeddings["embedded_encounter_partner_ids"],
            embeddings["embedded_encounter_duration"],
            num_encounters,
        )
        encounter_variables = self.encounter_mlp(pre_encounter_variables)
        # Done: pack to an addict and return
        assert (not self._diagnose or not self._output_as_tuple
                ), "cannot produce tuple (for tracing) while diagnosing"
        # If legacy code expects a tuple somewhere, we only give out the first
        # latent variable.
        if self._output_as_tuple:
            return encounter_variables, latent_variables["latent_variable"]
        results = dict()
        results["encounter_variables"] = encounter_variables
        # This is still compatible with legacy code that expects a
        # "latent_variable" entry.
        results.update(latent_variables)
        if self._diagnose:
            _locals = dict(locals())
            _locals.pop("results")
            _locals.pop("self")
            _locals.pop("encounter_variables")
            _locals.pop("latent_variable")
            results.update(_locals)
        return results
Esempio n. 25
0
 def __init__(self, cfgFile):
     super(DarknetTorch, self).__init__()
     self.sections = parse_Darknet_cfg(cfgFile)  #list of dictionaries
     self.moduleList = module_define_torch(self.sections)
     self.header = torch.IntTensor([0, 0, 0, 0])
     self.netparams = self.sections[0]  #dictionary defining net params
Esempio n. 26
0
def test_dtype(workers):
    alice, bob, james, me = (workers["bob"], workers["alice"], workers["james"], workers["me"])
    # Without fix_prec
    x = torch.tensor([1, 2, 3]).share(alice, bob, james, dtype="long")
    assert (
        x.child.dtype == "long"
        and x.child.field == 2 ** 64
        and isinstance(
            x.child.child["alice"].location.object_store.get_obj(
                x.child.child["alice"].id_at_location
            ),
            torch.LongTensor,
        )
        and (x.get() == torch.LongTensor([1, 2, 3])).all()
    )

    x = torch.tensor([4, 5, 6]).share(alice, bob, james, dtype="int")
    assert (
        x.child.dtype == "int"
        and x.child.field == 2 ** 32
        and isinstance(
            x.child.child["alice"].location.object_store.get_obj(
                x.child.child["alice"].id_at_location
            ),
            torch.IntTensor,
        )
        and (x.get() == torch.IntTensor([4, 5, 6])).all()
    )

    # With dtype custom
    x = torch.tensor([1, 2, 3]).share(alice, bob, james, dtype="custom", field=67)
    assert (
        x.child.dtype == "custom"
        and x.child.field == 67
        and isinstance(
            x.child.child["alice"].location.object_store.get_obj(
                x.child.child["alice"].id_at_location
            ),
            torch.IntTensor,
        )
        and (x.get() == torch.IntTensor([1, 2, 3])).all()
    )

    # With fix_prec
    x = torch.tensor([1.1, 2.2, 3.3]).fix_prec().share(alice, bob, james)
    assert (
        x.child.child.dtype == "long"
        and x.child.child.field == 2 ** 64
        and isinstance(
            x.child.child.child["alice"].location.object_store.get_obj(
                x.child.child.child["alice"].id_at_location
            ),
            torch.LongTensor,
        )
        and (x.get().float_prec() == torch.tensor([1.1, 2.2, 3.3])).all()
    )

    x = torch.tensor([4.1, 5.2, 6.3]).fix_prec(dtype="int").share(alice, bob, james)
    assert (
        x.child.child.dtype == "int"
        and x.child.child.field == 2 ** 32
        and isinstance(
            x.child.child.child["alice"].location.object_store.get_obj(
                x.child.child.child["alice"].id_at_location
            ),
            torch.IntTensor,
        )
        and (x.get().float_prec() == torch.tensor([4.1, 5.2, 6.3])).all()
    )
Esempio n. 27
0
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


# 创建网络模型
cnn = chsNet(1, len(alphabet) + 1)
cnn.apply(weights_init)
if cnn_data != '':
    print('loading pretrained model from %s' % cnn_data)

cnn.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(cnn_data).items()})

image = torch.FloatTensor(batchSize, 1, imgH, imgW)  # 3
text = torch.IntTensor(batchSize * 5)
length = torch.IntTensor(batchSize)

if torch.cuda.is_available():
    cnn = cnn.cuda()
    image = image.cuda()
    criterion = criterion.cuda()

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = utils.averager()

optimizer = optim.RMSprop(cnn.parameters(), lr=lr)
Esempio n. 28
0
    for filename in findFiles('data/names/*.txt'):
        category = filename.split('/')[-1].split('.')[0]
        all_categories.append(category)
        lines = readLines(filename)
        category_lines[category] = lines

    ######## LSTM Configuration
    num_classes = 18
    input_size = 57
    hidden_size = 57
    num_layers = 1

    mini_batch = 1
    seq_length = 20

    hidden_size_tensor = torch.autograd.Variable(torch.IntTensor([hidden_size
                                                                  ]),
                                                 requires_grad=False)
    mini_batch_tensor = torch.autograd.Variable(torch.IntTensor([mini_batch]),
                                                requires_grad=False)
    seq_length_tensor = torch.autograd.Variable(torch.IntTensor([seq_length]),
                                                requires_grad=False)
    num_layer_tensor = torch.autograd.Variable(torch.IntTensor([num_layers]),
                                               requires_grad=False)

    rnn = LSTM().cuda()
    net_dict = rnn.state_dict()
    pretrained_dict = {k: v for k, v in dict.items() if k in net_dict}
    net_dict.update(pretrained_dict)
    rnn.load_state_dict(pretrained_dict)

    ############ Test
Esempio n. 29
0
def demo(opt):
    """ Model Configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(
        demo_data, batch_size=opt.batch_size,
        shuffle=False,
        num_workers=0,  # In Linux use int(opt.workers), in Windows 0
        collate_fn=AlignCollate_demo, pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'./log_demo_result.txt', 'a')
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')
            log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
                log.write(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n')

            log.close()
Esempio n. 30
0
 def __init__(self, cfgfile):
     super(Darknet, self).__init__()
     self.blocks = parse_cfg(cfgfile)
     self.net_info, self.module_list = create_modules(self.blocks)
     self.header = torch.IntTensor([0, 0, 0, 0])
     self.seen = 0