def filter_images_with_only_crowd_annotations(dataset_dicts): """ Filter out images with none annotations or only crowd annotations (i.e., images without non-crowd annotations). A common training-time preprocessing on COCO dataset. Args: dataset_dicts (list[dict]): annotations in dl_lib Dataset format. Returns: list[dict]: the same format, but filtered. """ num_before = len(dataset_dicts) def valid(anns): for ann in anns: if ann.get("iscrowd", 0) == 0: return True return False dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])] num_after = len(dataset_dicts) logger.info( "Removed {} images with no usable annotations. {} images left.".format( num_before - num_after, num_after ) ) return dataset_dicts
def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image): """ Filter out images with too few number of keypoints. Args: dataset_dicts (list[dict]): annotations in dl_lib Dataset format. Returns: list[dict]: the same format as dataset_dicts, but filtered. """ num_before = len(dataset_dicts) def visible_keypoints_in_image(dic): # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility annotations = dic["annotations"] return sum( (np.array(ann["keypoints"][2::3]) > 0).sum() for ann in annotations if "keypoints" in ann ) dataset_dicts = [ x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image ] num_after = len(dataset_dicts) logger.info( "Removed {} images with fewer than {} keypoints.".format( num_before - num_after, min_keypoints_per_image ) ) return dataset_dicts
def broadcast_txt_msg(self, msg): if self.has_connection: # get all friends, then send msg one by one # all_friends = self.get_all_friends() page_num = 0 is_last = False i = 0 while not is_last: rp = requests.get(self.uranus_sdk.get_allusers_url + '?token={}&page_num={}&per_page=15'.format( self.uranus_sdk.token, page_num)) rp = rp.json() if rp['status'] == 'success': if rp['data'] != None: for u in rp['data']: i += 1 user = UranusUserCard() user.load_from_dict(u) # logging.info('broadcasting: {} {}'.format(user.user_addr, user.user_nick_name)) self.uranus_sdk.send_msg(user.user_addr, msg, self.ws_conn) page_num += 1 else: is_last = True else: pass logging.info('Now we finished broadcasting!!') self.uranus_sdk.send_msg('usrZK8kZTzEHC', '消息广播完毕....消息共推送到了{}位用户'.format(i), self.ws_conn) else: logging.error( '~~~~~~~~ broadcast_txt_msg send failed. ............................no connection' )
def __init__(self, bbox, image_size, mode="xyxy"): device = bbox.device if isinstance( bbox, torch.Tensor) else torch.device("cpu") # only do as_tensor if isn't a "no-op", because it hurts JIT tracing if not isinstance( bbox, torch.Tensor ) or bbox.dtype != torch.float32 or bbox.device != device: bbox = torch.as_tensor(bbox, dtype=torch.float32, device=device) if bbox.ndimension() != 2: bbox = bbox.squeeze(axis=0) logging.info('will break... bbox: {}, shape: {}'.format( bbox, bbox.size())) # raise ValueError( # "bbox should have 2 dimensions, got {}".format(bbox.ndimension()) # ) if bbox.size(-1) != 4: raise ValueError("last dimension of bbox should have a " "size of 4, got {}".format(bbox.size(-1))) if mode not in ("xyxy", "xywh"): raise ValueError("mode should be 'xyxy' or 'xywh'") self.bbox = bbox self.size = image_size # (image_width, image_height) self.mode = mode self.extra_fields = {}
def vis_det_yolo(img_root, label_root): logging.info('img root: {}, label root: {}'.format(img_root, label_root)) # auto detection .jpg or .png images txt_files = glob(os.path.join(label_root, '*.txt')) for txt_f in txt_files: img_f = os.path.join(img_root, os.path.basename(txt_f).split('.')[0] + '.jpg') if os.path.exists(img_f): img = cv2.imread(img_f) h, w, _ = img.shape if os.path.exists(txt_f): with open(txt_f) as f: annos = f.readlines() for ann in annos: ann = ann.strip().split(' ') category = ann[0] x = float(ann[1]) * w y = float(ann[2]) * h bw = float(ann[3]) * w bh = float(ann[4]) * h xmin = int(x - bw / 2) ymin = int(y - bh / 2) xmax = int(x + bw / 2) ymax = int(y + bh / 2) print(xmin, ymin, xmax, ymax, category) cv2.putText(img, category, (xmin, ymin), cv2.FONT_HERSHEY_COMPLEX, 0.7, (255, 255, 255)) cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2, 1) cv2.imshow('yolo check', img) cv2.waitKey(0) else: logging.warning('xxxx image: {} not found.'.format(img_f))
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) logging.info('do something....') if anno and "segmentation" in anno[0]: logging.info('f**k it runs,,,,') masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def __init__( self, cfg, confidence_threshold=0.7, show_mask_heatmaps=False, masks_per_dim=2, min_image_size=224, ): self.cfg = cfg.clone() self.model = build_detection_model(cfg) self.model.eval() self.device = torch.device(cfg.MODEL.DEVICE) self.model.to(self.device) self.min_image_size = min_image_size save_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, self.model, save_dir=save_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) logging.info('model loaded from: {}'.format(cfg.MODEL.WEIGHT)) self.transforms = self.build_transform() mask_threshold = -1 if show_mask_heatmaps else 0.5 self.masker = Masker(threshold=mask_threshold, padding=1) # used to make colors for each class self.palette = torch.tensor([2**25 - 1, 2**15 - 1, 2**21 - 1]) self.cpu_device = torch.device("cpu") self.confidence_threshold = confidence_threshold self.show_mask_heatmaps = show_mask_heatmaps self.masks_per_dim = masks_per_dim
def __init__(self, txt_path, preproc=None): self.preproc = preproc self.imgs_path = [] self.words = [] f = open(txt_path, 'r') lines = f.readlines() isFirst = True labels = [] for line in lines: line = line.rstrip() if line.startswith('#'): if isFirst is True: isFirst = False else: labels_copy = labels.copy() self.words.append(labels_copy) labels.clear() path = line[2:] path = txt_path.replace('label.txt', 'images/') + path self.imgs_path.append(path) else: line = line.split(' ') label = [float(x) for x in line] labels.append(label) logging.info('some img path to check: {}'.format(self.imgs_path[0])) self.words.append(labels)
def __init__(self, imgs_root, labels_root, infer_func, model=None, prep_func=None, posp_func=None, conf_thr=0.4, iou_thr=0.5): assert os.path.exists(imgs_root) assert os.path.exists(labels_root) self.infer_func = infer_func self.model_ = model self.prep_func = prep_func self.posp_func = posp_func self.conf_thr = conf_thr self.iou_thr = iou_thr self._data_root = imgs_root logger.info('data_root: {}'.format(self._data_root)) self.img_files = [] self.label_files = [] self.load_combined_imgs_and_labels(imgs_root, labels_root) self.hold_vis = True logger.info( 'Press space to vis image, press q to skip and continue eval.')
def login(self, user_acc, user_password): login_url = "http://{}:9000/api/v1/users_login".format(self.base_url) data = {"user_acc": user_acc, "user_password": user_password} rp = requests.post(login_url, data=data) if rp.ok: rp = rp.json() if rp['status'] == 'success': token = rp["data"]["token"] user_addr = rp["data"]["user_addr"] user_nick_name = rp["data"]["user_nick_name"] u = { 'token': token, 'user_addr': user_addr, 'user_acc': user_acc, 'user_nick_name': user_nick_name } # with open(self.token_store_f, 'wb') as f: # pickle.dump(u, f) self.is_login = True self.token = token self.user_addr = user_addr self.user_acc = user_acc self.user_nick_name = user_nick_name logging.info( '[uranuspy] login as: {}, userAcc: {}, userAddr: {}'. format(user_nick_name, user_acc, user_addr)) else: print('login failed.') exit() else: print('server not response.') exit()
def vis_voc(img_root, label_root): logging.info('img root: {}, label root: {}'.format(img_root, label_root)) # auto detection .jpg or .png images img_files = glob(os.path.join(img_root, '*.[jp][pn]g')) for img_f in img_files: if os.path.exists(img_f): img = cv2.imread(img_f) label_path = os.path.join( label_root, os.path.basename(img_f).split('.')[0] + '.xml') if os.path.exists(label_path): # tree = ET.parse(label_path) root = tree.getroot() for obj in get(root, 'object'): category = get_and_check(obj, 'name', 1).text bndbox = get_and_check(obj, 'bndbox', 1) xmin = int(float(get_and_check(bndbox, 'xmin', 1).text)) ymin = int(float(get_and_check(bndbox, 'ymin', 1).text)) xmax = int(float(get_and_check(bndbox, 'xmax', 1).text)) ymax = int(float(get_and_check(bndbox, 'ymax', 1).text)) cv2.putText(img, category, (xmin, ymin), cv2.FONT_HERSHEY_COMPLEX, 0.7, (255, 255, 255)) cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2, 1) cv2.imshow('voc check', img) cv2.waitKey(0) else: logging.warning( 'xxxx image: {} according label: {} not found.'.format( img_f, label_path))
def baidu_post(self, text): url = 'https://aip.baidubce.com/rpc/2.0/unit/bot/chat?access_token=' + self.access_token post_data = "{\"log_id\":\"UNITTEST_10000\",\"version\":\"2.0\",\"service_id\":\"S20258\",\"session_id\":\"\",\"request\":{\"query\":\"你好\",\"user_id\":\"88888\"},\"dialog_state\":{\"contexts\":{\"SYS_REMEMBERED_SKILLS\":[\"1057\"]}}}" request = requests.post(url, post_data.encode('utf-8'), headers={'Content-Type': 'application/json'}) logging.info(request.json()) return request.json()
def select_top_predictions(self, predictions): scores = predictions.get_field("scores") logging.info('scores: {}'.format(scores)) keep = torch.nonzero(scores > self.confidence_threshold).squeeze(1) predictions = predictions[keep] scores = predictions.get_field("scores") _, idx = scores.sort(0, descending=True) return predictions[idx]
def build_upsample_layers(cfg, ): if cfg.MODEL.CENTERNET.USE_DCN: from .networks.head.centernet_deconv_dc import CenternetDeconv logging.info('build model with DCN support.') upsample = CenternetDeconv(cfg) else: from .networks.head.centernet_deconv import CenternetDeconv upsample = CenternetDeconv(cfg) return upsample
def check_shape_resize_if_possible(imgs): shapes = [i.shape for i in imgs] if len(set(shapes)) == 1: return imgs else: logging.info( 'detected images shape not equal, resize to the first shape...') imgs = [cv2.resize(i, (shapes[0][1], shapes[0][0])) for i in imgs] return imgs
def build_centerface_train_loader(cfg, mapper=None): num_workers = get_world_size() images_per_batch = cfg.SOLVER.IMS_PER_BATCH assert ( images_per_batch % num_workers == 0 ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( images_per_batch, num_workers ) assert ( images_per_batch >= num_workers ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( images_per_batch, num_workers ) images_per_worker = images_per_batch // num_workers dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) # Bin edges for batching images with similar aspect ratios. If ASPECT_RATIO_GROUPING # is enabled, we define two bins with an edge at height / width = 1. group_bin_edges = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [] aspect_ratios = [float(img["height"]) / float(img["width"]) for img in dataset] if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = samplers.TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": sampler = samplers.RepeatFactorTrainingSampler( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD ) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) batch_sampler = build_batch_data_sampler( sampler, images_per_worker, group_bin_edges, aspect_ratios ) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, worker_init_fn=worker_init_reset_seed, ) return data_loader
def build_backbone(cfg, input_shape=None): if input_shape is None: input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) if cfg.MODEL.BACKBONE == 'shufflenets': backbone = ShuffleNetV2Backbone(cfg, input_shape) else: backbone = ResnetBackbone(cfg, input_shape) logging.info('using backbone: {}'.format(cfg.MODEL.BACKBONE)) assert isinstance(backbone, Backbone) return backbone
def run(generator, args, anchor_params, draw_paras): """ Main loop. Args generator: The generator to debug. args: parseargs args object. """ # display images, one at a time for i, data in enumerate(generator): # load the data image, annotations = np.transpose(data['img'].numpy(), (0, 2, 3, 1))[0], data['annot'][0].numpy() if len(annotations) > 0: anchors = anchor_params(image).numpy()[0] logging.info('anchors: {}'.format(anchors)) #print("anchors: ", anchors[-5:,:]) # print("annotations: ",annotations[:5,:4]) #print("scale and ratio: ", anchor_params.scales, anchor_params.ratios) positive_indices, _, max_indices = compute_gt_annotations(anchors, annotations[:,:4], 0.1) # best 1 anchor # draw anchors on the image # print("parameters: ",args.anchors,args.annotations,args.display_name) if draw_paras['no_gui']: image = image*255 image = image.astype("uint8") if draw_paras['anchors']: print("length of anchors: ",len(positive_indices),anchors.shape,image.shape) print(len(anchors[positive_indices])) image = draw_boxes(image, anchors[positive_indices], (255, 255, 0), thickness=2) # image = draw_boxes(image, anchors, (255, 255, 0), thickness=2) # draw annotations on the image if draw_paras['annotations']: # draw annotations in red draw_annotations(image, annotations, color=(0, 0, 255), label_to_name=None) # draw regressed anchors in green to override most red annotations # result is that annotations without anchors are red, with anchors are green draw_boxes(image, annotations[:,:4][max_indices[positive_indices], :], (0, 255, 0)) # write to file and advance if no-gui selected if draw_paras['no_gui']: output_path = make_output_path(args.output_dir, str(i)+".jpg", flatten=args.flatten_output) os.makedirs(os.path.dirname(output_path), exist_ok=True) cv2.imwrite(output_path, image) if args.limit and i+1>=args.limit: return continue # if we are using the GUI, then show an image cv2.imshow('Image', image) print("Image id: ",i) key = cv2.waitKeyEx() if (key == ord('q')) or (key == 27): cv2.destroyAllWindows() return False return True
def run_on_opencv_image(self, image): predictions = self.compute_prediction(image) top_predictions = self.select_top_predictions(predictions) logging.info('top predictions: {}'.format(top_predictions)) result = image.copy() if self.show_mask_heatmaps: return self.create_mask_montage(result, top_predictions) result = self.overlay_boxes(result, top_predictions) if self.cfg.MODEL.MASK_ON: result = self.overlay_mask(result, top_predictions) result = self.overlay_class_names(result, top_predictions) return result
def inference_onnx(self, aligned_img): logging.info('onnx in image shape: {}'.format(aligned_img.shape)) features = self.backbone(aligned_img) up_fmap = self.upsample(features) pred_dict = self.head(up_fmap) if self.cfg.MODEL.ONNX_POSTPROCESS: results = self.decode_prediction_onnx(pred_dict) return results else: # return fmp, wh, reg, consistent with original CenterNet onnx export. # TensorRT can do the rest thing (decode) return pred_dict['cls'], pred_dict['wh'], pred_dict['reg']
def text_to_pinyin_sequence(self, text): # pinyin = self.processor.pinyin_parser(text, style=Style.TONE3, errors="ignore") pinyin, text = self.tts_py.get_pyin(text) new_pinyin = [] for x in str(pinyin).split(" "): if "#" not in x: new_pinyin.append(x) phonemes = self.processor.get_phoneme_from_char_and_pinyin(text, new_pinyin) text = " ".join(phonemes) print("phoneme seq: {}".format(text)) logging.info("[TTSModel] [text_to_pinyin_sequence] phoneme seq:{}".format(text)) input_ids = self.processor.text_to_sequence(text, inference=False) return input_ids
def get_model(): # init model model = build_net_003((64, 64, 1), num_classes) logging.info('model loaded.') latest_ckpt = tf.train.latest_checkpoint(os.path.dirname(ckpt_path)) if latest_ckpt: start_epoch = int(latest_ckpt.split('-')[1].split('.')[0]) model.load_weights(latest_ckpt) logging.info('model resumed from: {} at epoch: {}'.format(latest_ckpt, start_epoch)) return model else: logging.error('can not found any checkpoints matched: {}'.format(ckpt_path))
def train(args): config.merge_from_list(args.opts) cfg = config model = build_model(cfg) if not os.path.exists(cfg.OUTPUT_DIR): os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) logger.info('output will be saved into: {}'.format(cfg.OUTPUT_DIR)) trainer = Trainer(cfg, model) trainer.resume_or_load(resume=args.resume) if cfg.TEST.AUG.ENABLED: trainer.register_hooks( [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] ) return trainer.train()
def act(self, from_talk=None, talk_to=None, msg_executor=None, session_hold_bundle=None): session_label = None params_dict = None talk_to_uid = talk_to if isinstance(talk_to, dict): talk_to_uid = talk_to['user_addr'] talk_to = talk_to['user_nick_name'] if session_hold_bundle is not None: session_label = session_hold_bundle['session_label'] params_dict = session_hold_bundle['params_dict'] s = ' '.join(from_talk.split(' ')[1:]) logging.info(s) return youdao_translate(s)
def act(self, from_talk=None, talk_to=None, msg_executor=None, session_hold_bundle=None): session_label = None params_dict = None if session_hold_bundle is not None: session_label = session_hold_bundle['session_label'] params_dict = session_hold_bundle['params_dict'] if session_label == 'ask_gitlab': talk_to_uid = talk_to username = from_talk if talk_to_uid == 'usrZK8kZTzEHC' or talk_to_uid == 'usrItug3Lj2c5': is_mana = True if 'mana' not in from_talk: is_mana = False rp = self.add_new_member_to_vip_mana(username, is_mana=is_mana) return rp else: return '糟糕,被你发现了隐藏的功能,可这是一个高度机密操作,您的权限还不够' else: talk_to_uid = talk_to if isinstance(talk_to, dict): talk_to_uid = talk_to['user_addr'] talk_to = talk_to['user_nick_name'] # 添加mana会员 jintian # 添加strangeai会员 jintian if len(from_talk.split(' ')) > 1: username = from_talk.split(' ')[-1] else: username = None logging.info('username: {}'.format(username)) if username == '' or not username: session_holder.hold(talk_to_uid=talk_to_uid, session_label='ask_gitlab', func_path='GitlabAdder.act', params_dict={}) return '请告诉我你要开通的gitlab用户名' else: if talk_to_uid == 'usrZK8kZTzEHC' or talk_to_uid == 'usrItug3Lj2c5': is_mana = True if 'mana' not in from_talk: is_mana = False rp = self.add_new_member_to_vip_mana(username, is_mana=is_mana) return rp else: return '糟糕,被你发现了隐藏的功能,可这是一个高度机密操作,您的权限还不够'
def vis_coco(coco_img_root, ann_f): data_dir = coco_img_root coco = COCO(ann_f) cats = coco.loadCats(coco.getCatIds()) logging.info('cats: {}'.format(cats)) img_ids = coco.getImgIds() logging.info('all images we got: {}'.format(len(img_ids))) # draw instances for img_id in img_ids: img = coco.loadImgs(img_id)[0] print('checking img: {}, id: {}'.format(img, img_id)) # img['file_name'] may be not basename img_f = os.path.join(data_dir, os.path.basename(img['file_name'])) if not os.path.exists(img_f): # if not then pull it back to normal mode img_f = os.path.join(data_dir, img['file_name']) anno_ids = coco.getAnnIds(imgIds=img['id']) annos = coco.loadAnns(anno_ids) logging.info('showing anno: {}'.format(annos)) if len(annos[0]['segmentation']) == 0: logging.info('no segmentation found, using opencv vis.') img = cv2.imread(img_f) font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 0.36 font_thickness = 1 line_thickness = 1 for ann in annos: b = ann['bbox'] x1 = int(b[0]) y1 = int(b[1]) x2 = int(x1 + b[2]) y2 = int(y1 + b[3]) cls_id = ann['category_id'] unique_color = get_unique_color_by_id(cls_id) cv2.rectangle(img, (x1, y1), (x2, y2), unique_color, line_thickness, cv2.LINE_AA) text_label = '{}'.format(cls_id) (ret_val, _) = cv2.getTextSize(text_label, font, font_scale, font_thickness) txt_bottom_left = (x1 + 4, y1 - 4) cv2.rectangle(img, (txt_bottom_left[0] - 4, txt_bottom_left[1] - ret_val[1] - 2), (txt_bottom_left[0] + ret_val[0] + 2, txt_bottom_left[1] + 4), (0, 0, 0), -1) cv2.putText(img, text_label, txt_bottom_left, font, font_scale, (237, 237, 237), font_thickness, cv2.LINE_AA) cv2.imshow('rr', img) cv2.waitKey(0) else: I = io.imread(img_f) plt.imshow(I) plt.axis('off') coco.showAnns(annos, True) plt.show()
def coco2yolo(img_r, j_f): data_dir = img_r coco = COCO(j_f) cats = coco.loadCats(coco.getCatIds()) logging.info('cats: {}'.format(cats)) print('cls list for yolo\n') for i in range(len(cats)): print(cats[i]['name']) print('\n') print('all {} categories.'.format(len(cats))) img_ids = coco.getImgIds() target_txt_r = os.path.join(os.path.dirname(img_r), 'yolo', 'labels') target_img_r = os.path.join(os.path.dirname(img_r), 'yolo', 'images') os.makedirs(target_txt_r, exist_ok=True) os.makedirs(target_img_r, exist_ok=True) print('solving, this gonna take some while...') for img_id in img_ids: img = coco.loadImgs(img_id)[0] # print('checking img: {}, id: {}'.format(img, img_id)) # img['file_name'] may be not basename img_f = os.path.join(data_dir, os.path.basename(img['file_name'])) if not os.path.exists(img_f): # if not then pull it back to normal mode img_f = os.path.join(data_dir, img['file_name']) anno_ids = coco.getAnnIds(imgIds=img['id']) annos = coco.loadAnns(anno_ids) out_file = open( os.path.join(target_txt_r, os.path.basename(img_f).split('.')[0] + '.txt'), 'w') img = cv2.imread(img_f) h, w, _ = img.shape shutil.copy(img_f, os.path.join(target_img_r, os.path.basename(img_f))) for ann in annos: b = ann['bbox'] x1 = int(b[0]) y1 = int(b[1]) x2 = int(x1 + b[2]) y2 = int(y1 + b[3]) cls_id = ann['category_id'] b = [x1, x2, y1, y2] bb = convert((w, h), b) out_file.write( str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') out_file.close() print('convert to yolo done!')
def do_synthesis(self, input_text): input_text = self.tts_pause.add_pause(input_text) print("input_text>>>>", input_text) logging.info("[TTSModel] [do_synthesis] input_text:{}".format(input_text)) input_ids = self.processor.text_to_sequence(input_text, inference=True) _, mel_outputs, stop_token_prediction, alignment_history = self.tacotron2.inference( tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0), tf.convert_to_tensor([len(input_ids)], tf.int32), tf.convert_to_tensor([0], dtype=tf.int32)) remove_end = 1024 audio = self.mb_melgan.inference(mel_outputs)[0, :-remove_end, 0] return mel_outputs.numpy(), alignment_history.numpy(), audio.numpy()
def load_proposals_into_dataset(dataset_dicts, proposal_file): """ Load precomputed object proposals into the dataset. The proposal file should be a pickled dict with the following keys: - "ids": list[int] or list[str], the image ids - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores corresponding to the boxes. - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``. Args: dataset_dicts (list[dict]): annotations in dl_lib Dataset format. proposal_file (str): file path of pre-computed proposals, in pkl format. Returns: list[dict]: the same format as dataset_dicts, but added proposal field. """ logger.info("Loading proposals from: {}".format(proposal_file)) with open(proposal_file, "rb") as f: proposals = pickle.load(f, encoding="latin1") # Rename the key names in D1 proposal files rename_keys = {"indexes": "ids", "scores": "objectness_logits"} for key in rename_keys: if key in proposals: proposals[rename_keys[key]] = proposals.pop(key) # Fetch the indexes of all proposals that are in the dataset # Convert image_id to str since they could be int. img_ids = set({str(record["image_id"]) for record in dataset_dicts}) id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids} # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS for record in dataset_dicts: # Get the index of the proposal i = id_to_index[str(record["image_id"])] boxes = proposals["boxes"][i] objectness_logits = proposals["objectness_logits"][i] # Sort the proposals in descending order of the scores inds = objectness_logits.argsort()[::-1] record["proposal_boxes"] = boxes[inds] record["proposal_objectness_logits"] = objectness_logits[inds] record["proposal_bbox_mode"] = bbox_mode return dataset_dicts
def youdao_translate(q): rp = requests.get(myurl.format(q)) if rp.ok: rp = rp.json() if rp['errorCode'] == 0: res_list = rp['translateResult'] res = '' logging.info(res_list) for i, item in enumerate(res_list): res += '{}. {}'.format(i, item[0]['tgt']) return res else: return '调取云翻译失败' else: return '获取服务失败'