def get_matched_boxes(images, othreshold, cthreshold, target): _, T_feature, out_T = T_model(images) out_T_test = detect(out_T[0], softmax(out_T[1]), out_T[2]) # [1, num_cls, top_k, 5] # print(out_T_test) T_ROI, T_score = ROI_gen(out_T_test) T_feature = ChannelPool(T_feature) out7_T = roi_pooling_2d(Variable(T_feature, requires_grad=False), Variable(T_ROI, requires_grad=False), output_size=(7, 7), spatial_scale=8) input_d_T = Variable(transform(out7_T), requires_grad=False) cls_match, loc_match, mask, prev_loc, num_match = match_process(out_T_test, target, othreshold, cthreshold, device) out_T_test_cls = [] # [match] out_T_test对应标签 j = 0 for i in range(sum(mask)): j = list(mask).index(1, j+1) k = j // top_k - 1 out_T_test_cls.append(k) if num_match == 0: raise Exception("no boxes matched") cls_match = cls_match.view(-1, 1) cls_match = cls_match.long() cls_match = cls_match.squeeze(1)[mask] # [match] loc_match = loc_match.view(-1, 4)[mask] # [match, 4] out_T_test = out_T_test.view(-1, 5)[mask] # [match, 5] feature_t = input_d_T.view(-1, input_d_T.size(-3), input_d_T.size(-2), input_d_T.size(-1))[mask] _, d_cls_t, d_loc_t = discriminator(feature_t) # [match, 21], [match, 4] d_cls_t = softmax(d_cls_t) d_conf, d_cls = d_cls_t.max(1, keepdim=True) d_cls = d_cls.squeeze(1) # [match] d_loc = d_loc_t + out_T_test[:, 1:] d_loc[d_loc < 0] = 0 d_loc[d_loc > 1] = 1 return d_conf, d_cls, d_loc, cls_match, loc_match, out_T_test, out_T_test_cls
def transform_input(input_text, LengthRatioProcessor, LevenshteinPreprocessor, WordRankRatioPreprocessor, SentencePiecePreprocessor): print("Computing ", LengthRatioProcessor) output_text = transform.transform(input_text, LengthRatioProcessor, LevenshteinPreprocessor, WordRankRatioPreprocessor, SentencePiecePreprocessor) return output_text
def pixar(): try: pixify_image = request.args.get("pixify") if pixify_image == "False": pixify_image = False else: pixify_image = True print(request.files) img_file = request.files["file"] filename = img_file.filename filename = rename(filename) if os.path.exists("./images") == False: os.mkdir("./images") img_file.save(os.path.join("./images", filename)) except Exception as error: print(error) return abort(400) else: res_path = transform( models, filename, pixify_image, delete_input=True, delete_intermediate=True ) if res_path == "-1": return abort(404) with open(res_path, "rb") as file: encoded = base64.b64encode(file.read()) return encoded
def getUp(self, iFrame, iStep=0): mtx = self.getTransform(iFrame, iStep) up = self.getInitCameraUp() o = numpy.array([0, 0, 0], dtype = numpy.double) newUp = Transform.transform(mtx, up) - Transform.transform(mtx, o) return newUp
def getPosition(self, iFrame, iStep=0): mtx = self.getTransform(iFrame, iStep) pt = self.getInitPosition() return Transform.transform(mtx, pt)
device, device_ids = prepare_device(devices) othreshold = 0.3 cthreshold = 0.3 try: testset = VOCDetection('/home/share/Dataset/VOCdevkit/', [('2007', 'trainval')], None, VOCAnnotationTransform()) except FileNotFoundError: testset = VOCDetection('/home/hanchengye/data/VOCdevkit/', [('2007', 'trainval')], None, VOCAnnotationTransform()) T_model = torch.load('model/ssd300_VOC_2.pkl', map_location={'cuda:1':str(device)}) T_model = T_model.to(device) discriminator = torch.load('model/ssdd300_VOC_2.pkl', map_location={'cuda:1':str(device)}) # param_D = torch.load('Root') # discriminator.load_state_dict(param_D) discriminator = discriminator.to(device) detect = Detect(21, 0, top_k, 0.01, 0.45, device) softmax = nn.Softmax(dim=-1) transform = transform(1, 21) ROI_gen = roi_gen() ChannelPool = ChannelPool(64) Tensor = torch.cuda.FloatTensor def get_processed_img(img_id): """ :param img_id: 图像的id :return: T_model的input, 给plt的input, target: [num_obj, loc+cls] """ image = testset.pull_image(img_id) rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) _, ann = testset.pull_anno(img_id) x = cv2.resize(image, (300, 300)).astype(np.float32) x -= (104.0, 117.0, 123.0)
def main(argv): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('-i', '--input_video', help='Input video', required=True) parser.add_argument('-o', '--output_video', help='Output video', required=True) parser.add_argument('-oc', '--output_comparer_video', help='Output comparer video', required=True) parser.add_argument('-kp_t', '--kp_threshold', help='Theshold used in key point selection', required=True) parser.add_argument('-d', '--debug', help='Debuggin mode', action='store_true') ARGS = parser.parse_args() global DEBUG DEBUG = ARGS.debug cap = cv2.VideoCapture(ARGS.input_video) video_out = vutils.get_write_instance(ARGS.output_video, cap) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) video_comp_out = vutils.get_write_instance(ARGS.output_comparer_video, cap, (2*width, height)) if cap.isOpened(): ret, frame = cap.read() kp0, des0 = f2d.SIFT(frame) p = np.indices(frame.shape[:2]).swapaxes(0,2).reshape((np.prod(frame.shape[:2]),2), order = 'F') i = 2 t_acc = 0 while(1): start = time() ret, frame = cap.read() if not ret: break kp1, des1 = f2d.SIFT(frame) matches = d_match.kp_matcher(des0, des1, float(ARGS.kp_threshold)) print("kps:",kp1.shape) y = kp0[matches[matches < len(kp0)]] x = kp1[matches < len(kp0)] print("matches:",x.shape) model, _ = ransac(x, y, AffineTransformationModel(), 100, 1.0, int(x.shape[0] * 0.8)) img = transform(frame, model, p) img = img.astype(np.uint8) grid_image = vutils.create_grid(frame, img).astype(np.uint8) if DEBUG: cv2.imwrite('./output/corrected/frame%d.png' % (i), img) cv2.imwrite('./output/original/_frame%d.png' % (i), frame) cv2.imwrite('./output/composed/grid%d.png' % (i), grid_image) video_out.write(img) video_comp_out.write(grid_image) kp0 = model.predict(kp1) des0 = des1 end = time() t_acc += end - start print("Frame %04d/%04d\tProc. time: %.2fs\tTotal time: %dm%.2ds" % (i, length, end - start, t_acc // 60, round(t_acc % 60))) i += 1 cap.release() video_out.release() video_comp_out.release()
shuffle=True, collate_fn=detection_collate, pin_memory=True) param_T = torch.load('weights/ssd300_COCO_395000.pth') T_model.load_state_dict(param_T) T_model = T_model.to(device) total_param, _ = get_parameter_number(T_model) discriminator = Discriminator(Channel, voc['num_classes']) discriminator = discriminator.to(device) softmax = nn.Softmax(dim=-1).to(device) detect = Detect(21, 0, top_k, 0.01, 0.45, device) transform = transform(Batch_size, voc['num_classes']).to(device) roi_extract = roi_extract(Channel, device).to(device) criterion = MultiBoxLoss(21, 0.5, True, 0, True, 3, 0.5, False, True).to(device) iteration = 0 rate = 1.0 while rate > 0.5: vis_title = 'SSD.PyTorch on ' + dataset.name + 'KDGAN_1F1D ' + str( rate) vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot( 'Iteration', 'Loss(' + str(Batch_size) + '_' + str(Channel) + '_' + str(Epoch) + '_v2', vis_title, vis_legend)