def get_matched_boxes(images, othreshold, cthreshold, target):
    _, T_feature, out_T = T_model(images)
    out_T_test = detect(out_T[0], softmax(out_T[1]), out_T[2])  # [1, num_cls, top_k, 5]
    # print(out_T_test)
    T_ROI, T_score = ROI_gen(out_T_test)
    T_feature = ChannelPool(T_feature)
    out7_T = roi_pooling_2d(Variable(T_feature, requires_grad=False),
                            Variable(T_ROI, requires_grad=False),
                            output_size=(7, 7), spatial_scale=8)
    input_d_T = Variable(transform(out7_T), requires_grad=False)
    cls_match, loc_match, mask, prev_loc, num_match = match_process(out_T_test, target, othreshold, cthreshold,
                                                                    device)

    out_T_test_cls = [] # [match] out_T_test对应标签
    j = 0
    for i in range(sum(mask)):
        j = list(mask).index(1, j+1)
        k = j // top_k - 1
        out_T_test_cls.append(k)
    if num_match == 0:
        raise Exception("no boxes matched")
    cls_match = cls_match.view(-1, 1)
    cls_match = cls_match.long()
    cls_match = cls_match.squeeze(1)[mask]     # [match]
    loc_match = loc_match.view(-1, 4)[mask]    # [match, 4]
    out_T_test = out_T_test.view(-1, 5)[mask]  # [match, 5]
    feature_t = input_d_T.view(-1, input_d_T.size(-3), input_d_T.size(-2), input_d_T.size(-1))[mask]
    _, d_cls_t, d_loc_t = discriminator(feature_t)  # [match, 21], [match, 4]
    d_cls_t = softmax(d_cls_t)
    d_conf, d_cls = d_cls_t.max(1, keepdim=True)
    d_cls = d_cls.squeeze(1)  # [match]
    d_loc = d_loc_t + out_T_test[:, 1:]
    d_loc[d_loc < 0] = 0
    d_loc[d_loc > 1] = 1
    return d_conf, d_cls, d_loc, cls_match, loc_match, out_T_test, out_T_test_cls
def transform_input(input_text, LengthRatioProcessor, LevenshteinPreprocessor,
                    WordRankRatioPreprocessor, SentencePiecePreprocessor):
    print("Computing ", LengthRatioProcessor)
    output_text = transform.transform(input_text, LengthRatioProcessor,
                                      LevenshteinPreprocessor,
                                      WordRankRatioPreprocessor,
                                      SentencePiecePreprocessor)
    return output_text
Beispiel #3
0
def pixar():
    try:
        pixify_image = request.args.get("pixify")

        if pixify_image == "False":
            pixify_image = False
        else:
            pixify_image = True

        print(request.files)

        img_file = request.files["file"]

        filename = img_file.filename
        filename = rename(filename)

        if os.path.exists("./images") == False:
            os.mkdir("./images")

        img_file.save(os.path.join("./images", filename))

    except Exception as error:
        print(error)
        return abort(400)

    else:
        res_path = transform(
            models, filename, pixify_image, delete_input=True, delete_intermediate=True
        )
        if res_path == "-1":
            return abort(404)

        with open(res_path, "rb") as file:
            encoded = base64.b64encode(file.read())

        return encoded
Beispiel #4
0
 def getUp(self, iFrame, iStep=0):
     mtx = self.getTransform(iFrame, iStep)
     up = self.getInitCameraUp()
     o = numpy.array([0, 0, 0], dtype = numpy.double)
     newUp =  Transform.transform(mtx, up) - Transform.transform(mtx, o)
     return newUp
Beispiel #5
0
 def getPosition(self, iFrame, iStep=0):
     mtx = self.getTransform(iFrame, iStep)
     pt = self.getInitPosition()
     return Transform.transform(mtx, pt)
device, device_ids = prepare_device(devices)
othreshold = 0.3
cthreshold = 0.3
try:
    testset = VOCDetection('/home/share/Dataset/VOCdevkit/', [('2007', 'trainval')], None, VOCAnnotationTransform())
except FileNotFoundError:
    testset = VOCDetection('/home/hanchengye/data/VOCdevkit/', [('2007', 'trainval')], None, VOCAnnotationTransform())
T_model = torch.load('model/ssd300_VOC_2.pkl', map_location={'cuda:1':str(device)})
T_model = T_model.to(device)
discriminator = torch.load('model/ssdd300_VOC_2.pkl', map_location={'cuda:1':str(device)})
# param_D = torch.load('Root')
# discriminator.load_state_dict(param_D)
discriminator = discriminator.to(device)
detect = Detect(21, 0, top_k, 0.01, 0.45, device)
softmax = nn.Softmax(dim=-1)
transform = transform(1, 21)
ROI_gen = roi_gen()
ChannelPool = ChannelPool(64)
Tensor = torch.cuda.FloatTensor


def get_processed_img(img_id):
    """
    :param img_id: 图像的id
    :return: T_model的input, 给plt的input, target: [num_obj, loc+cls]
    """
    image = testset.pull_image(img_id)
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    _, ann = testset.pull_anno(img_id)
    x = cv2.resize(image, (300, 300)).astype(np.float32)
    x -= (104.0, 117.0, 123.0)
def main(argv):
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input_video',  help='Input video',    required=True)
    parser.add_argument('-o', '--output_video',  help='Output video',    required=True)
    parser.add_argument('-oc', '--output_comparer_video',  help='Output comparer video',    required=True)
    parser.add_argument('-kp_t', '--kp_threshold',  help='Theshold used in key point selection',    required=True)
    parser.add_argument('-d',  '--debug',        help='Debuggin mode', action='store_true')
    ARGS = parser.parse_args()

    global DEBUG
    DEBUG = ARGS.debug

    cap = cv2.VideoCapture(ARGS.input_video)
    video_out = vutils.get_write_instance(ARGS.output_video, cap)

    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)   # float
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    video_comp_out = vutils.get_write_instance(ARGS.output_comparer_video, cap, (2*width, height))

    if cap.isOpened():
        ret, frame = cap.read()
        kp0, des0 = f2d.SIFT(frame)

        p = np.indices(frame.shape[:2]).swapaxes(0,2).reshape((np.prod(frame.shape[:2]),2), order = 'F')

        i = 2
        
        t_acc = 0
        while(1):
            start = time()
            ret, frame = cap.read()

            if not ret:
                break

            kp1, des1 = f2d.SIFT(frame)
            matches = d_match.kp_matcher(des0, des1, float(ARGS.kp_threshold))
            print("kps:",kp1.shape)

            y = kp0[matches[matches < len(kp0)]]
            x = kp1[matches < len(kp0)]
            print("matches:",x.shape)

            model, _ = ransac(x, y, AffineTransformationModel(), 100, 1.0, int(x.shape[0] * 0.8))
            img = transform(frame, model, p)

            img = img.astype(np.uint8)
            grid_image = vutils.create_grid(frame, img).astype(np.uint8)

            if DEBUG:
                cv2.imwrite('./output/corrected/frame%d.png' % (i), img)
                cv2.imwrite('./output/original/_frame%d.png' % (i), frame)
                cv2.imwrite('./output/composed/grid%d.png' % (i), grid_image)

            video_out.write(img)
            video_comp_out.write(grid_image)

            kp0 = model.predict(kp1)
            des0 = des1
            end = time()
            t_acc += end - start
            print("Frame %04d/%04d\tProc. time: %.2fs\tTotal time: %dm%.2ds" % (i, length, end - start, t_acc // 60, round(t_acc % 60)))
            i += 1

        

        cap.release()
        video_out.release()
        video_comp_out.release()
Beispiel #8
0
                                       shuffle=True,
                                       collate_fn=detection_collate,
                                       pin_memory=True)

    param_T = torch.load('weights/ssd300_COCO_395000.pth')
    T_model.load_state_dict(param_T)
    T_model = T_model.to(device)

    total_param, _ = get_parameter_number(T_model)

    discriminator = Discriminator(Channel, voc['num_classes'])
    discriminator = discriminator.to(device)

    softmax = nn.Softmax(dim=-1).to(device)
    detect = Detect(21, 0, top_k, 0.01, 0.45, device)
    transform = transform(Batch_size, voc['num_classes']).to(device)
    roi_extract = roi_extract(Channel, device).to(device)

    criterion = MultiBoxLoss(21, 0.5, True, 0, True, 3, 0.5, False,
                             True).to(device)

    iteration = 0
    rate = 1.0
    while rate > 0.5:

        vis_title = 'SSD.PyTorch on ' + dataset.name + 'KDGAN_1F1D  ' + str(
            rate)
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot(
            'Iteration', 'Loss(' + str(Batch_size) + '_' + str(Channel) + '_' +
            str(Epoch) + '_v2', vis_title, vis_legend)