def use_small_data_with_simple_textCNN(): data = read_data_toutiao('./datasets/toutiao/toutiao_cat_data.txt') data = gen_small_data(data) x_train_padded_seqs, y_train, x_test_padded_seqs, y_test, vocab = preprocess( data['content'], data['label']) simple_textCNN(x_train_padded_seqs, y_train, x_test_padded_seqs, y_test, vocab)
def __call__(self, imgs): # frame preprocessing _, framed_imgs, framed_metas = preprocess(imgs, max_size=self.input_size) if self.use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) dtype = torch.float32 if not self.use_float16 else torch.float16 x = x.to(dtype).permute(0, 3, 1, 2) # model predict with torch.no_grad(): features, regression, classification, anchors = self.model(x) out = postprocess(x, anchors, regression, classification, self.regressBoxes, self.clipBoxes, self.score_thresh, self.nms_thresh) # result out = invert_affine(framed_metas, out) if len(out) == 0: return None, None, None rois = [o['rois'] for o in out] scores = [o['scores'] for o in out] class_ids = [o['class_ids'] for o in out] if self.is_xywh: return xyxy_to_xywh(rois), scores, class_ids else: return rois, scores, class_ids
def detect(img_path): #------------------preprocessing------------------------ ori_imgs, framed_imgs, framed_metas = preprocess( img_path, max_size=input_size) #input_size: 512 x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) with torch.no_grad(): start = timeutil.get_epochtime_ms() t1 = time.time() features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) c1, c2 = display(out, ori_imgs, imshow=True, imwrite=False) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1') print('milisecond is ' + str(t2 - t1)) print("Latency: %fms" % (timeutil.get_epochtime_ms() - start)) return c1, c2
def main(): # import data if DEBUG: data_train = pd.read_csv( '../data/training-train.csv' ) #pd.read_csv('../data/training-small-train.csv')# data_validation = pd.read_csv( '../data/training-validate.csv' ) #pd.read_csv('../data/training-small-validate.csv')# else: data_train = pd.read_csv('../data/training.csv') data_validation = pd.read_csv('../data/testData.csv') data_train['train_flag'] = True data_validation['train_flag'] = False data = pd.concat((data_train, data_validation)) # keep missing flags for both training and validation ytr_missing = np.array( data_train.loc[:, 'COVAR_y1_MISSING':'COVAR_y3_MISSING']) yvl_missing = np.array( data_validation.loc[:, 'COVAR_y1_MISSING':'COVAR_y3_MISSING']) # remove temporary data del data_train del data_validation # basic formatting Xtr, ytr, Xvl, yvl = utils.format_data(data, preprocessing=USE_PREPROCESSING) del data # preprocess data if USE_PREPROCESSING: use_pca = False # apply PCA (True) or standard normalization (False) Xtr, Xvl = utils.preprocess(Xtr, Xvl, use_pca) # create RNN instance n_features = len(Xtr[0]) n_outputs = len(ytr[0]) nn_solver = RNN(n_features=n_features, n_outputs=n_outputs, n_neurons=hidden_size, param_update_scheme=param_update_scheme, learning_rate=learning_rate, activation_rule=activation_rule, use_batch_step=USE_BATCH_TRAINING, batch_step_size=batch_step_size, relu_neg_slope=relu_neg_slope, use_dropout_regularization=use_dropout_regularization, dropout_threshold=dropout_threshold, reg_strenght=reg_strenght, use_regularization=use_regularization, sgd_shuffle=sgd_shuffle) if not PREDICT_ONLY: trainAndTest(nn_solver, Xtr, ytr, ytr_missing, Xvl, yvl, yvl_missing) else: predictByModel(nn_solver, Xvl, '../models/DeepNN/model_2016-08-03T15_39_15.mat')
def predict(self, img_path, threshold=0.5): self.system_dict["params"]["threshold"] = threshold ori_imgs, framed_imgs, framed_metas = preprocess( img_path, max_size=self.system_dict["local"]["input_size"]) if self.system_dict["params"]["use_cuda"]: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not self.system_dict["params"]["use_float16"] else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = self.system_dict[ "local"]["model"](x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, self.system_dict["params"]["threshold"], self.system_dict["params"]["iou_threshold"]) out = invert_affine(framed_metas, out) scores, labels, bboxes = self.display(out, ori_imgs, imshow=False, imwrite=True) return scores, labels, bboxes
def main(img_path, base_name, checkpoint_path): ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) # model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.load_state_dict(torch.load(checkpoint_path)) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) display(out, ori_imgs, base_name,imshow=False, imwrite=True)
def detect_image(self, image_path, use_cuda=False, use_float16=False, threshold=0.2, iou_threshold=0.2): # replace this part with your project's anchor config max_size = self.input_sizes[self.compound_coef] anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=max_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) features, regression, classification, anchors = self.forward(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) self.__save_image(out, ori_imgs, imwrite=True)
def read_images(): for filename in os.listdir(imgfile_path): ori_imgs, framed_imgs, framed_metas = preprocess(os.path.join( imgfile_path, filename), max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=7, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(f'weights/efficientdet-d7/efficientdet-d7.pth') ) #place weight path here model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) display(filename, out, ori_imgs, imshow=False, imwrite=True) print('running speed test...') with torch.no_grad(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for _ in range(10): _, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
def use_total_data_with_textCNN(): data = read_data_toutiao('./datasets/toutiao/toutiao_cat_data.txt') #data = gen_small_data(data) x_train_padded_seqs, y_train, x_test_padded_seqs, y_test, vocab = preprocess( data['content'], data['label']) embedding_matrix = get_embedding('./datasets/toutiao/toutiao_cat_w2v.pkl', vocab) textCNN(x_train_padded_seqs, y_train, x_test_padded_seqs, y_test, embedding_matrix, vocab)
def evaluate_coco(img_path, model, threshold=0.05): kag_res = ["image_id,PredictionString"] included_extensions = ['jpg', 'jpeg', 'bmp', 'png', 'gif'] imgs_files = [os.path.join(img_path, fn) for fn in os.listdir(img_path) if any(fn.endswith(ext) for ext in included_extensions)] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() for img_path in tqdm(imgs_files): ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_sizes[compound_coef]) x = torch.from_numpy(framed_imgs[0]) if use_cuda: x = x.cuda(gpu) if use_float16: x = x.half() else: x = x.float() else: x = x.float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, nms_threshold) if not preds: continue preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] rois = preds['rois'] if rois.shape[0] > 0: # x1,y1,x2,y2 -> x1,y1,w,h rois[:, 2] -= rois[:, 0] rois[:, 3] -= rois[:, 1] kag_res.append(f"{os.path.basename(img_path).replace('.jpg', '')},{format_prediction_string(rois, scores)}") if not len(kag_res): raise Exception('the model does not provide any valid output, check model architecture and the data input') # write output filepath = f'/kaggle/working/submission.csv' if os.path.exists(filepath): os.remove(filepath) with open(filepath, "w") as f: for line in kag_res: f.write(line) f.write("\n")
def main(argv): C0 = 0 C1 = 8 # Read args from command line sampleSize = utils.parseArgs(argv) # Load the train and test sets from MNIST print("Loading datasets from MNIST...") (x_train, y_train), (x_test, y_test) = mnist.load_data() # Apply preprocessing to the training and test sets print("Preprocessing training set...") x_train, y_train = utils.preprocess(x_train, y_train, C0, C1) print("Preprocessing testing set...") x_test, y_test = utils.preprocess(x_test, y_test, C0, C1) # Apply feature selection to training set print("Applying feature selection...") x_train, x_test = utils.featureSelection(x_train, x_test) # Split training set by class x0_train = [_ for i, _ in enumerate(x_train) if y_train[i] == 0] x1_train = [_ for i, _ in enumerate(x_train) if y_train[i] == 1] # Take random sample of each class of training set print("Sampling {}% of training set".format(sampleSize * 100)) x0_train_sample = random.sample(x0_train, int(len(x0_train) * sampleSize)) x1_train_sample = random.sample(x1_train, int(len(x1_train) * sampleSize)) # Use Dr Arodz's code to get MAP estimate print( "Running Dr Arodz's code to obtain MAP estimates of means and covariance" ) m0, m1, cov = Arodz(x0_train_sample, x1_train_sample) # Predict labels for test set print("Testing model...") labels = predict(m0, m1, cov, x_test) # Evaluate label accuracy utils.evaluate(labels, y_test)
def single_img_test(img_path, input_size, model, use_cuda=True, use_float16=False): # tf bilinear interpolation is different from any other's, just make do threshold = 0.05 iou_threshold = 0.5 image_name = img_path.replace('\\', '/').split('/')[-1] ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) # display(out, ori_imgs, imshow=False, imwrite=True) # print('running speed test...') # with torch.no_grad(): # print('test1: model inferring and postprocessing') # print('inferring image for 10 times...') # t1 = time.time() # for _ in range(10): # _, regression, classification, anchors = model(x) # # out = postprocess(x, # anchors, regression, classification, # regressBoxes, clipBoxes, # threshold, iou_threshold) # out = invert_affine(framed_metas, out) # # t2 = time.time() # tact_time = (t2 - t1) / 10 # print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1') det_num = len(out[0]['class_ids']) det = [] for i in range(det_num): det.append([image_name, out[0]['class_ids'][i], out[0]['scores'][i], tuple(out[0]['rois'][i])]) return det
def train(loader, model, epochs=5, batch_size=2, show_loss=False, augmenter=None, lr=None, init_lr=2e-4, saver=None, variables_to_optimize=None, evaluation=True, name_best_model='/root/Ev-SegNet-old/weights/model/best', preprocess_mode=None): training_samples = len(loader.image_train_list) steps_per_epoch = int((training_samples / batch_size) + 1) best_miou = 0 for epoch in range(epochs): # for each epoch lr_decay(lr, init_lr, 1e-9, epoch, epochs - 1) # compute the new lr print('epoch: ' + str(epoch) + '. Learning rate: ' + str(lr.numpy())) for step in range(steps_per_epoch): # for every batch with tf.GradientTape() as g: # get batch # print("process:%.2f"%(step/steps_per_epoch), "\t", step, "/", steps_per_epoch) x, y, mask = loader.get_batch(size=batch_size, train=True, augmenter=augmenter) x = preprocess(x, mode=preprocess_mode) [x, y, mask] = convert_to_tensors([x, y, mask]) y_, aux_y_ = model(x, training=True, aux_loss=True) # get output of the model loss = tf.losses.softmax_cross_entropy(y, y_, weights=mask) # compute loss loss_aux = tf.losses.softmax_cross_entropy(y, aux_y_, weights=mask) # compute loss loss = 1 * loss + 0.8 * loss_aux if show_loss: print('Training loss: ' + str(loss.numpy())) # Gets gradients and applies them grads = g.gradient(loss, variables_to_optimize) optimizer.apply_gradients(zip(grads, variables_to_optimize)) if evaluation: # get metrics # train_acc, train_miou = get_metrics(loader, model, loader.n_classes, train=True, preprocess_mode=preprocess_mode) test_acc, test_miou = get_metrics(loader, model, loader.n_classes, train=False, flip_inference=False, scales=[1], preprocess_mode=preprocess_mode) # print('Train accuracy: ' + str(train_acc.numpy())) # print('Train miou: ' + str(train_miou)) print('Test accuracy: ' + str(test_acc.numpy())) print('Test miou: ' + str(test_miou)) print('Best miou: ' + str(best_miou)) print('') # save model if bet if test_miou > best_miou: best_miou = test_miou saver.save(name_best_model) else: saver.save(name_best_model) loader.suffle_segmentation() # sheffle trainign set
def get_predictions(self, img_name='', image=None, plot=False): """ Gets the bounding box prediction for the image and returns them in tensor of bboxes in the format: [[x1, y1, x2, y2, score], ...] """ if img_name: img = cv2.imread(os.path.join(settings.INPUT_FOLDER, img_name)) else: img = image img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1)) img, info_img = preprocess(img, self.imgsize, jitter=0) # info = (h, w, nh, nw, dx, dy) img = np.transpose(img / 255., (2, 0, 1)) img = torch.from_numpy(img).float().unsqueeze(0) if use_cuda(): img = Variable(img.type(torch.cuda.FloatTensor)) else: img = Variable(img.type(torch.FloatTensor)) with torch.no_grad(): outputs = self.model(img) outputs = postprocess(outputs, Dataset.NUM_CLASSES[Dataset.SIGNET_RING], self.confthre, self.nmsthre) bboxes = list() colors = list() bboxes_with_scores = list() if outputs[0] is not None: for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]: print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred)) print('\t+ Conf: %.5f' % cls_conf.item()) box = yolobox2label([y1, x1, y2, x2], info_img) bboxes.append(box) colors.append(BOX_COLOR) tmp = [box[1], box[0], box[3], box[2]] tmp.append(conf * cls_conf) bboxes_with_scores.append(tmp) if plot: vis_bbox(img_raw, bboxes, instance_colors=colors, linewidth=2) plt.show() return torch.FloatTensor(bboxes_with_scores)
def main(argv): C0 = 0 C1 = 8 # Read args from command line sampleSize = utils.parseArgs(argv) # Load the train and test sets from MNIST print("Loading datasets from MNIST...") (x_train, y_train), (x_test, y_test) = mnist.load_data() # Apply preprocessing to the training and test sets print("Preprocessing training set...") x_train, y_train = utils.preprocess(x_train, y_train, C0, C1) print("Preprocessing testing set...") x_test, y_test = utils.preprocess(x_test, y_test, C0, C1) # Apply feature selection to training set # print("Applying feature selection...") # x_train, x_test = utils.featureSelection(x_train, x_test) # Sample training set sampleIndicies = random.sample(range(len(x_train)), int(len(x_train)*sampleSize)) x_train_sample = [_ for i, _ in enumerate(x_train) if i in sampleIndicies] y_train_sample = [_ for i, _ in enumerate(y_train) if i in sampleIndicies] # Obtain MAP estimates print("Running Dr Arodz's code to obtain MAP estimates of w and b") w, b = Arodz(x_train_sample, y_train_sample) # Predict labels for test set print("Testing model...") labels = predict(w, b, x_test) # Evaluate label accuracy utils.evaluate(labels, y_test)
def predict(tomogram, model, preprocess, postprocess, device, ): model.eval() tomogram = preprocess(tomogram) # preprocess image x = torch.from_numpy(tomogram) # convert to torch tensor x = x.to(device) # send tensor to device (GPU normally) with torch.no_grad(): out = model(x) # send through model/network seg_map = out[1] # obtain the segmentation map as output # den_map = out[0] # to obtain denoised volume as output uncomment this line out_softmax = torch.softmax(seg_map, dim=1) # perform softmax on segmentation output to extract probabilities for each class result = postprocess(out_softmax) # postprocess outputs return result
def load_s3_image_and_preprocess(bucket, key): """download the file from s3, process it using the external library returns: ori_imgs (list of np.ndarray) the original image as a cv2 np array framed_imgs (list of np.ndarray) a resized and rescaled version of each image in ori_imgs framed_metas (list of tuples) a list of tuples of scaling information, where each tuple has components new_w, new_h, old_w, old_h, padding_w, padding_h """ with tempfile.NamedTemporaryFile(suffix=os.path.splitext(key)[1]) as ntf: s3 = boto3.client('s3') obj = s3.get_object(Bucket=bucket, Key=key) with open(ntf.name, 'wb') as fp: fp.write(obj['Body'].read()) return preprocess(ntf.name, max_size=MAX_INPUT_SIZE, mean=PARAMS['mean'], std=PARAMS['std'])
def __getitem__(self, idx): """ Load single item from the dataset """ if torch.is_tensor(idx): idx = idx.tolist() # Get a particular row from the dataframe emotion, image, _ = self.data.iloc[idx] emotion = int(emotion) # Parse image from string and convert to a numpy array image = image.split(" ") image = np.array(image, dtype=np.float32) # Preprocess the image (Normalization and reshaping) image = preprocess(image) # Return dict with image and emotion sample = {'image': image, 'emotion': emotion} return sample
def get_face_position(fn): _, fimg, meta = preprocess(fn, max_size=effdet_input_size) x = torch.from_numpy(fimg[0]).float().unsqueeze(0) x = x.permute(0, 3, 1, 2) if args.cuda: x = x.cuda() with torch.no_grad(): _, reg, clss, anchors = model(x) rbox = BBoxTransform() cbox = ClipBoxes() out = postprocess(x, anchors, reg, clss, rbox, cbox, \ effdet_thr, effdet_iou_thr) out = invert_affine(meta, out) lst_face_bbox = [] for i_detect in range(len(out[0]["rois"])): lst_face_bbox.append( [int(val) for val in out[0]["rois"][i_detect]] ) return lst_face_bbox
def evaluate_coco(img_path, set_name, image_ids, coco, model, threshold=0.05): results = [] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() for image_id in tqdm(image_ids): image_info = coco.loadImgs(image_id)[0] image_path = img_path + '/' + image_info['file_name'] ori_imgs, framed_imgs, framed_metas = preprocess( image_path, max_size=input_sizes[compound_coef]) x = torch.from_numpy(framed_imgs[0]) if use_cuda: x = x.cuda(gpu) if use_float16: x = x.half() else: x = x.float() else: x = x.float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, nms_threshold) if not preds: continue preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] class_ids = preds['class_ids'] rois = preds['rois'] if rois.shape[0] > 0: # x1,y1,x2,y2 -> x1,y1,w,h rois[:, 2] -= rois[:, 0] rois[:, 3] -= rois[:, 1] bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] image_result = { 'image_id': image_id, 'category_id': label + 1, 'score': float(score), 'bbox': box.tolist(), } results.append(image_result) if not len(results): raise Exception( 'the model does not provide any valid output, check model architecture and the data input' ) # write output filepath = f'{set_name}_bbox_results.json' if os.path.exists(filepath): os.remove(filepath) json.dump(results, open(filepath, 'w'), indent=4)
os.makedirs(unknown_path, exist_ok=True) for set_name in sets: img_folder = os.path.join(opt.dataset, set_name) img_ids = [ os.path.join(img_folder, f) for f in filter(lambda file: file.endswith('.bmp'), os.listdir(os.path.join(img_folder))) ] progress_bar = tqdm(range(len(img_ids))) for img_id in img_ids: # tf bilinear interpolation is different from any other's, just make do input_size = config.force_input_size or input_sizes[ config.compound_coef] ori_imgs, framed_imgs, framed_metas = preprocess( img_id, crop_size=params.crop_size, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack( [torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16 ).permute(0, 3, 1, 2) if use_cuda: model = model.cuda() if use_float16: model = model.half()
obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requires_grad_(False) model.eval() if use_cuda:
model.load_state_dict(torch.load(f'logs/{project}/efficientdet-d{compound_coef}_{number}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() cap = cv2.VideoCapture(video_path) while cap.isOpened(): # load image hasFrames, image = cap.read() ori_imgs, framed_imgs, framed_metas = preprocess(image, max_size=input_size, video = True) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification,
def evaluate_coco_show_res_jss(img_path, set_name, image_ids, coco, model, threshold=0.05): results = [] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() count = 0 for image_id in tqdm(image_ids): count = count + 1 if count > 21: break image_info = coco.loadImgs(image_id)[0] image_path = img_path + image_info['file_name'] print('image path:', image_path) ori_imgs, framed_imgs, framed_metas = preprocess( image_path, max_size=input_sizes[compound_coef]) x = torch.from_numpy(framed_imgs[0]) if use_cuda: x = x.cuda(gpu) if use_float16: x = x.half() else: x = x.float() else: x = x.float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, nms_threshold) if not preds: continue preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] class_ids = preds['class_ids'] rois = preds['rois'] if rois.shape[0] > 0: # x1,y1,x2,y2 -> x1,y1,w,h rois[:, 2] -= rois[:, 0] rois[:, 3] -= rois[:, 1] bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] image_result = { 'image_id': image_id, 'category_id': label + 1, 'score': float(score), 'bbox': box.tolist(), } score = float(score) category_id = label + 1 box = box.tolist() # print('box:',box) xmin, ymin, w, h, score = int(box[0]), int(box[1]), int( box[2]), int(box[3]), score if score > 0.2: cv2.rectangle(ori_imgs[0], (xmin, ymin), (xmin + w, ymin + h), (0, 255, 0), 6) cv2.putText(ori_imgs[0], '{}:{:.2f}'.format(category_id, score), (xmin, ymin), cv2.FONT_HERSHEY_SIMPLEX, 4.0, (0, 255, 0), 6) results.append(image_result) cv2.imwrite( './test_result/zhongchui_d3_epoch200_1124/' + 'tmp' + '{}'.format(count) + '.jpeg', ori_imgs[0]) if not len(results): raise Exception( 'the model does not provide any valid output, check model architecture and the data input' ) # write output # filepath = f'{set_name}_bbox_results.json' filepath = det_save_json if os.path.exists(filepath): os.remove(filepath) json.dump(results, open(filepath, 'w'), indent=4)
def request_chat(uid: str, text: str) -> dict: # print(uid) # print(text) # prep = dataset.load_predict(text, embed_processor) # print(prep) # intent = intent_classifier.predict(prep, calibrate=False) # entity = entity_recognizer.predict(prep) # entity = None # text = dataset.prep.tokenize(text, train=False) # dialogue_cache[uid] = scenario_manager.apply_scenario(intent, entity, text) # BERT 인텐트 # utterance = utterence = preprocess(text) max_seq_len = 50 inputs = tokenizer.encode_plus(utterence, None, pad_to_max_length=True, add_special_tokens=True, return_attention_mask=True, max_length = max_seq_len, ) ids = inputs["input_ids"] token_type_ids = inputs["token_type_ids"] mask = inputs["attention_mask"] input_data = { 'ids': torch.tensor(ids, dtype=torch.long), 'mask': torch.tensor(mask, dtype=torch.long), 'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long), # 'target': torch.tensor(self.train_csv.iloc[index, 2], dtype=torch.long) # 'target': torch.tensor(self.target[index], dtype=torch.long) } input_data['ids'] = input_data['ids'].to(device) input_data['mask'] = input_data['mask'].to(device) input_data['token_type_ids'] = input_data['token_type_ids'].to(device) # input_data['target'] = input_data['target'].to(device) input_data['ids'] = input_data['ids'].unsqueeze(0) input_data['mask'] = input_data['mask'].unsqueeze(0) input_data['token_type_ids'] = input_data['token_type_ids'].unsqueeze(0) # 3. 모델에 데이터 넣기 inputs = {'input_ids': input_data['ids'], 'token_type_ids' : input_data['token_type_ids'], 'attention_mask': input_data['mask'] } outputs = model(**inputs) intent_str, intent_candidate_str, score_str, intent, score = postprocess(outputs, model) feedback['id'].append(uid) feedback['text'].append(text) feedback['utterance'].append(utterence) feedback['intent'].append(intent) feedback['score'].append(score) feedback['label'].append(999) entity = None dialogue_cache = {'input': text, 'intent': intent_str, 'entity': entity, 'state':'FALLBACK', 'answer': None, 'score': score_str} # feedback에 저장 # feedback[''] return dialogue_cache
def img_detect(file, img_dir, model, input_size, regressBoxes, clipBoxes, prior_mask, threshold): fname, ext = os.path.splitext(file) image_id = int(fname.split("_")[-1]) img_path = os.path.join(img_dir, file) ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) if args.flip_test: ids = torch.arange(x.shape[-1] - 1, -1, -1).long().cuda() x_flip = x[..., ids] x_cat = torch.cat([x, x_flip], 0) with torch.no_grad(): if args.flip_test: features, union_act_cls, union_sub_reg, union_obj_reg, \ inst_act_cls, inst_obj_cls, inst_bbox_reg, anchors = model(x_cat) anchors = torch.cat([anchors, anchors], 0) preds_union = postprocess_dense_union_flip( x_cat, anchors, union_act_cls, union_sub_reg, union_obj_reg, regressBoxes, clipBoxes, 0.5, 1) preds_inst = postprocess_hoi_flip(x_cat, anchors, inst_bbox_reg, inst_obj_cls, inst_act_cls, regressBoxes, clipBoxes, threshold, nms_threshold, mode="object", classwise=True) else: features, union_act_cls, union_sub_reg, union_obj_reg, \ inst_act_cls, inst_obj_cls, inst_bbox_reg, anchors = model(x) preds_union = postprocess_dense_union(x, anchors, union_act_cls, union_sub_reg, union_obj_reg, regressBoxes, clipBoxes, 0.5, 1, classwise=True) preds_inst = postprocess_hoi(x, anchors, inst_bbox_reg, inst_obj_cls, inst_act_cls, regressBoxes, clipBoxes, threshold, nms_threshold, mode="object", classwise=True) preds_inst = invert_affine(framed_metas, preds_inst)[0] preds_union = invert_affine(framed_metas, preds_union)[0] dets = hoi_match(image_id, preds_inst, preds_union, prior_mask) return dets
def evaluate_voc(gt_dict, img_paths, model, max_size, config): results = [] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() for idx, image_path in enumerate(tqdm(img_paths)): ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=max_size) x = torch.from_numpy(framed_imgs[0]) if config.eval_use_cuda: x = x.cuda(config.eval_gpu) if config.eval_use_float16: x = x.half() else: x = x.float() else: x = x.float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, config.eval_threshold, config.eval_nms_threshold, anchor_free_mode=config.anchor_free_mode) if not preds: continue preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] class_ids = preds['class_ids'] rois = preds['rois'] if rois.shape[0] > 0: # # x1,y1,x2,y2 -> x1,y1,w,h # rois[:, 2] -= rois[:, 0] # rois[:, 3] -= rois[:, 1] bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] image_result = [idx, box[0], box[1], box[2], box[3], score, label] results.append(image_result) if not len(results): raise Exception('the model does not provide any valid output, check model architecture and the data input') voc_certs = [] for idx in range(len(config.obj_list)): npos, nd, rec, prec, ap = voc_eval(gt_dict, results, idx, iou_thres=0.5, use_07_metric=False) voc_certs.append([prec, rec, ap]) return voc_certs
nlp_vi = spacy.load("vi") nlp_en = spacy.load("en_core_web_sm") review_data = get_data(review_file) comment_data = get_data(comment_file) for txt in review_data: try: lang = detect(txt) if lang == 'vi': nlp = nlp_vi else: nlp = nlp_en paragraphs = txt.split("\n") for paragraph in paragraphs: for sent in sent_tokenize(paragraph): preprocess_sent = preprocess(sent) doc = nlp(preprocess_sent) ents = [] for ent in doc.ents: if hasattr(ent, "label"): ents.append((ent.text, ent.label_)) if ents: write_result(sentence=sent, preprocess_sentence=preprocess_sent, entities=ents, language=lang, output_file=review_output_file) except LangDetectException: pass
def detect(model, dataset, args): use_cuda = not args.cpu threshold = args.threshold iou_threshold = args.iou_threshold input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[args.compound_coef] img_dir = os.path.join(dataset, dataset, 'images') bbox_dir = os.path.join(dataset, dataset, 'annotations', 'bboxes') vis_dir = os.path.join(dataset, 'det_vis') prepare_dirs(bbox_dir, vis_dir) img_paths = [os.path.join(img_dir, f) for f in os.listdir(img_dir)] for img_path in tqdm(img_paths): ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) ori_img = ori_imgs[0] img_id = os.path.basename(img_path).split('.')[0] json_byhand = os.path.join(dataset, 'annotation_byhand', img_id + '.json') if os.path.exists(json_byhand): with open(json_byhand) as f: annotation_byhand = json.load(f) points = annotation_byhand['shapes'][0]['points'] max_box = points[0] + points[1] else: if args.update: # only process annotations by hand continue if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(ft) for fi in framed_imgs], 0) x = x.to(torch.float32).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) pred = invert_affine(framed_metas, preds)[0] max_area, max_box = 0, [0, 0, ori_img.shape[1], ori_img.shape[0]] for det, class_id in zip(pred['rois'], pred['class_ids']): if not class_id == 0: continue x1, y1, x2, y2 = det.astype(np.int) w, h = x2 - x1, y2 - y1 area = w * h if area > max_area: max_area = area max_box = [x1, y1, x2, y2] plot_one_box(ori_img, max_box, color=[255, 0, 255], line_thickness=2) if args.vis: cv2.imwrite(os.path.join(vis_dir, img_id + '.jpg'), ori_img) bbox_file = os.path.join(bbox_dir, img_id + '.txt') with open(bbox_file, 'w') as f: bbox_info = ' '.join(map(str, max_box)) f.write(bbox_info)
names = [] for ext in ('*.gif', '*.png', '*.jpg', '*.PNG', '*.JPG'): image_list = glob.glob(img_path + ext) if (len(image_list) == 0): continue last_file = image_list[-1] for image in image_list: images.append(image) names.append(os.path.basename(image)) if len(images) >= args.batch_size or image == last_file: ori_imgs, framed_imgs, framed_metas = preprocess( images, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to( torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x)