def main(): device = paddle.set_device(FLAGS.device) paddle.disable_static(device) if FLAGS.dynamic else None train_transform = Compose([ GroupScale(), GroupMultiScaleCrop(), GroupRandomCrop(), GroupRandomFlip(), NormalizeImage() ]) train_dataset = KineticsDataset( file_list=os.path.join(FLAGS.data, 'train_10.list'), pickle_dir=os.path.join(FLAGS.data, 'train_10'), label_list=os.path.join(FLAGS.data, 'label_list'), transform=train_transform) val_transform = Compose( [GroupScale(), GroupCenterCrop(), NormalizeImage()]) val_dataset = KineticsDataset( file_list=os.path.join(FLAGS.data, 'val_10.list'), pickle_dir=os.path.join(FLAGS.data, 'val_10'), label_list=os.path.join(FLAGS.data, 'label_list'), mode='val', transform=val_transform) pretrained = FLAGS.eval_only and FLAGS.weights is None model = tsm_resnet50(num_classes=train_dataset.num_classes, pretrained=pretrained) step_per_epoch = int(len(train_dataset) / FLAGS.batch_size \ / ParallelEnv().nranks) optim = make_optimizer(step_per_epoch, model.parameters()) model.prepare(optimizer=optim, loss=paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy(topk=(1, 5))) if FLAGS.eval_only: if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) model.evaluate(val_dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.num_workers) return if FLAGS.resume is not None: model.load(FLAGS.resume) model.fit(train_data=train_dataset, eval_data=val_dataset, epochs=FLAGS.epoch, batch_size=FLAGS.batch_size, save_dir=FLAGS.save_dir or 'tsm_checkpoint', num_workers=FLAGS.num_workers, drop_last=True, shuffle=True)
def build_transform(): CIFAR_MEAN = [0.5071, 0.4865, 0.4409] CIFAR_STD = [0.1942, 0.1918, 0.1958] train_transforms = Compose([ RandomCrop(32, padding=4), ContrastTransform(0.1), BrightnessTransform(0.1), RandomHorizontalFlip(), RandomRotation(15), ToArray(), Normalize(CIFAR_MEAN, CIFAR_STD), ]) test_transforms = Compose([ToArray(), Normalize(CIFAR_MEAN, CIFAR_STD)]) return train_transforms, test_transforms
def __init__(self, output=None, weight_path=None): """ output (str|None): output path, if None, do not write depth map to pfm and png file. weight_path (str|None): weight path, if None, load default MiDaSv2.1 model. """ self.output_path = os.path.join(output, 'MiDaS') if output else None self.net_h, self.net_w = 384, 384 if weight_path is None: midasv2_weight_url = 'https://paddlegan.bj.bcebos.com/applications/midas.pdparams' weight_path = get_path_from_url(midasv2_weight_url) self.weight_path = weight_path self.model = self.load_checkpoints() self.transform = Compose([ Resize( self.net_w, self.net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ])
def main(): device = paddle.set_device(FLAGS.device) paddle.disable_static(device) if FLAGS.dynamic else None transform = Compose([GroupScale(), GroupCenterCrop(), NormalizeImage()]) dataset = KineticsDataset( pickle_file=FLAGS.infer_file, label_list=FLAGS.label_list, mode='test', transform=transform) labels = dataset.label_list model = tsm_resnet50( num_classes=len(labels), pretrained=FLAGS.weights is None) model.prepare() if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) imgs, label = dataset[0] pred = model.test_batch([imgs[np.newaxis, :]]) pred = labels[np.argmax(pred)] logger.info("Sample {} predict label: {}, ground truth label: {}" \ .format(FLAGS.infer_file, pred, labels[int(label)]))
def preprocess(img_path, size): # 图像变换 transform = Compose([ Resize( size, size, resize_target=None, keep_aspect_ratio=False, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet() ]) # 读取图片 img = cv2.imread(img_path) # 归一化 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 # 图像变换 img = transform({"image": img})["image"] # 新增维度 img = img[np.newaxis, ...] return img
def test_with_dataloader(self): for device in self.devices: paddle.set_device(device) # data loader transform = Compose( [Normalize( mean=[127.5], std=[127.5], data_format='CHW')]) train_dataset = paddle.vision.datasets.MNIST( mode='train', transform=transform) train_loader = paddle.io.DataLoader( train_dataset, batch_size=64, shuffle=True, drop_last=True, num_workers=0) for batch_id, (image, _) in enumerate(train_loader()): out = self.custom_ops[0](image) pd_out = paddle.nn.functional.relu(image) self.assertTrue( np.array_equal(out, pd_out), "custom op out: {},\n paddle api out: {}".format(out, pd_out)) if batch_id == 5: break
def __init__(self, name=None, directory=None, use_gpu=False): # 设置模型路径 model_path = os.path.join(self.directory, "model-f6b98070") # 加载模型 self.model = InferenceModel(modelpath=model_path, use_gpu=use_gpu, use_mkldnn=False, combined=True) self.model.eval() # 数据预处理配置 self.net_h, self.net_w = 384, 384 self.transform = Compose([ Resize( self.net_w, self.net_h, resize_target=None, keep_aspect_ratio=False, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet() ])
def __init__(self, cfg, train=True): super(ImageNet, self).__init__() self.cfg = cfg self.train = train self.data_infor = get_data(cfg.Data.dataset) self.traindir = os.path.join(cfg.Data.Dir, 'train') self.valdir = os.path.join(cfg.Data.Dir, 'val') self.catedict = dict( zip(sorted(os.listdir(self.valdir)[:1000]), range(1000))) # transform # assured inumpyut is CHW self.normalize = Normalize(mean=self.data_infor.mean, std=self.data_infor.std, data_format='CHW') self.transform_train = [ RandomResizedCrop(cfg.Trans.crop_size, scale=(cfg.Trans.min_area_ratio, 1.0), ratio=(3. / 4, cfg.Trans.aspect_ratio)) ] if self.data_infor.eigval is not None and self.data_infor.eigvec is not None \ and cfg.Trans.random_color: lighting = Lighting(0.1, self.data_infor.eigval, self.data_infor.eigvec) jitter = ColorJitter(0.4, 0.4, 0.4) self.transform_train.extend([jitter, lighting]) self.transform_train.extend( [RandomHorizontalFlip(), ToTensor(), self.normalize]) self.transform_train = Compose(self.transform_train) self.transform_val = Compose([ Resize(cfg.Trans.scale_size), CenterCrop(cfg.Trans.crop_size), ToTensor(), self.normalize ]) self.file_list = self.get_samples()
def build_aug(args): """build augmentation transforms Args: args: data augmentation config Return: transforms """ transforms = Compose([ RandomHorizontalFlip(), ToArray(), Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], data_format='CHW'), ]) return transforms
def predict_class(self, im, key): # 预测小关节的等级 from paddle.vision.transforms import Compose, Resize, Normalize, Transpose transforms = Compose([ Resize(size=(224, 224)), Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], data_format='HWC'), Transpose() ]) self.arthrosis[key][2].eval() im = np.expand_dims(im, 2) infer_data = transforms(im) infer_data = np.expand_dims(infer_data, 0) infer_data = paddle.to_tensor(infer_data, dtype='float32') result = self.arthrosis[key][2](infer_data)[0] # 关键代码,实现预测功能 result = np.argmax(result.numpy()) # 获得最大值所在的序号 return result
def train(): paddle.set_device('gpu') model = resnet50() paddle.summary(model, (1, 3, 32, 32)) transform = Compose([ paddle.vision.transforms.Transpose(), paddle.vision.transforms.Normalize(0, 255.), ]) cifar10 = Cifar10(mode='train', transform=transform) loader = paddle.io.DataLoader(cifar10, shuffle=True, batch_size=BATCH_SIZE, num_workers=10) for epoch in range(EPOCH_NUM): for batch_id, data in enumerate(loader()): out = model(data[0]) out = paddle.mean(out) if batch_id % 10 == 0: print("Epoch {}: batch {}, out {}".format( epoch, batch_id, out.numpy()))
def main(): device = paddle.set_device(FLAGS.device) paddle.disable_static(device) if FLAGS.dynamic else None if not FLAGS.eval_only: # training mode train_transform = Compose([ ColorDistort(), RandomExpand(), RandomCrop(), RandomFlip(), NormalizeBox(), PadBox(), BboxXYXY2XYWH() ]) train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()]) dataset = COCODataset(dataset_dir=FLAGS.data, anno_path='annotations/instances_train2017.json', image_dir='train2017', with_background=False, mixup=True, transform=train_transform) batch_sampler = DistributedBatchSampler(dataset, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) loader = DataLoader(dataset, batch_sampler=batch_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=train_collate_fn) else: # evaluation mode eval_transform = Compose([ ResizeImage(target_size=608), NormalizeBox(), PadBox(), BboxXYXY2XYWH() ]) eval_collate_fn = BatchCompose([NormalizeImage()]) dataset = COCODataset(dataset_dir=FLAGS.data, anno_path='annotations/instances_val2017.json', image_dir='val2017', with_background=False, transform=eval_transform) # batch_size can only be 1 in evaluation for YOLOv3 # prediction bbox is a LoDTensor batch_sampler = DistributedBatchSampler(dataset, batch_size=1, shuffle=False, drop_last=False) loader = DataLoader(dataset, batch_sampler=batch_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=eval_collate_fn) pretrained = FLAGS.eval_only and FLAGS.weights is None model = yolov3_darknet53(num_classes=dataset.num_classes, num_max_boxes=NUM_MAX_BOXES, model_mode='eval' if FLAGS.eval_only else 'train', pretrained=pretrained) if FLAGS.pretrain_weights and not FLAGS.eval_only: model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True) optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters()) model.prepare(optimizer=optim, loss=YoloLoss(num_classes=dataset.num_classes)) # NOTE: we implement COCO metric of YOLOv3 model here, separately # from 'prepare' and 'fit' framework for follwing reason: # 1. YOLOv3 network structure is different between 'train' and # 'eval' mode, in 'eval' mode, output prediction bbox is not the # feature map used for YoloLoss calculating # 2. COCO metric behavior is also different from defined Metric # for COCO metric should not perform accumulate in each iteration # but only accumulate at the end of an epoch if FLAGS.eval_only: if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) preds = model.predict(loader, stack_outputs=False) _, _, _, img_ids, bboxes = preds anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json') coco_metric = COCOMetric(anno_path=anno_path, with_background=False) for img_id, bbox in zip(img_ids, bboxes): coco_metric.update(img_id, bbox) coco_metric.accumulate() coco_metric.reset() return if FLAGS.resume is not None: model.load(FLAGS.resume) save_dir = FLAGS.save_dir or 'yolo_checkpoint' model.fit(train_data=loader, epochs=FLAGS.epoch - FLAGS.no_mixup_epoch, save_dir=os.path.join(save_dir, "mixup"), save_freq=10) # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches dataset.mixup = False model.fit(train_data=loader, epochs=FLAGS.no_mixup_epoch, save_dir=os.path.join(save_dir, "no_mixup"), save_freq=5)
model.prepare( paddle.optimizer.Momentum( learning_rate=LinearWarmup( CosineAnnealingDecay(LR, MAX_EPOCH), 2000, 0., LR), momentum=MOMENTUM, parameters=model.parameters(), weight_decay=WEIGHT_DECAY), paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy(topk=(1, 5))) transforms = Compose([ RandomCrop(32, padding=4), RandomApply(BrightnessTransform(0.1)), RandomApply(ContrastTransform(0.1)), RandomHorizontalFlip(), RandomRotation(15), ToArray(), Normalize(CIFAR_MEAN, CIFAR_STD), ]) val_transforms = Compose([ToArray(), Normalize(CIFAR_MEAN, CIFAR_STD)]) train_set = Cifar100(DATA_FILE, mode='train', transform=transforms) test_set = Cifar100(DATA_FILE, mode='test', transform=val_transforms) callbacks = [LRSchedulerM(), callbacks.VisualDL( 'vis_logs/res20_3x3_lr0.1cos_e300_bs128_bri_con_aug')] model.fit( train_set, test_set, epochs=MAX_EPOCH, batch_size=BATCH_SIZE, shuffle=True,
paddle.enable_static() paddle.set_device("gpu") # model image = static.data(shape=[None, 1, 28, 28], name='image', dtype='float32') label = static.data(shape=[None, 1], name='label', dtype='int64') net = LeNet() out = net(image) loss = nn.functional.cross_entropy(out, label) opt = paddle.optimizer.Adam(learning_rate=0.001) opt.minimize(loss) # data loader transform = Compose([Normalize(mean=[127.5], std=[127.5], data_format='CHW')]) train_dataset = paddle.vision.datasets.MNIST(mode='train', transform=transform) train_loader = paddle.io.DataLoader(train_dataset, feed_list=[image, label], batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2) # prepare exe = static.Executor() exe.run(static.default_startup_program()) places = paddle.static.cuda_places() compiled_program = static.CompiledProgram( static.default_main_program()).with_data_parallel(loss_name=loss.name,
def inference(model, left_imgs, right_ims, LOG): stages = 4 model.eval() transform = Compose([ToTensor(), Normalize(mean=imagenet_stats["mean"], std=imagenet_stats["std"])]) for index in range(len(left_imgs)): # LOG.info("left = {}\tright = {}".format(left_imgs[index], right_ims[index])) left_img = cv2.imread(left_imgs[index], cv2.IMREAD_UNCHANGED) right_img = cv2.imread(right_ims[index], cv2.IMREAD_UNCHANGED) h, w, c = left_img.shape th, tw = 368, 1232 if h<th or w<tw: continue left_img = left_img[h - th:h, w - tw:w, :] right_img = right_img[h - th:h, w - tw:w, :] left_input = transform(left_img[:, :, ::-1]).unsqueeze(axis=0) right_input = transform(right_img[:, :, ::-1]).unsqueeze(axis=0) with paddle.no_grad(): start_time = time.time() outputs = model(left_input, right_input) cost_time = time.time()-start_time ss = "Inference 4 stages cost = {:.3f} sec, FPS = {:.1f}".format(cost_time, 1/cost_time) for stage in range(stages): outputs[stage] = outputs[stage].squeeze(axis=[0, 1]).numpy().astype(np.uint8) color_disp = cv2.applyColorMap(cv2.convertScaleAbs(outputs[stage], alpha=1, beta=0), cv2.COLORMAP_JET) if args.left_img: temp_path = args.left_img.split("/")[0:-1] temp_path = "/".join(temp_path) save_img_path = os.path.join(temp_path, str(stage+1)+".png") cv2.imwrite(save_img_path, color_disp) LOG.info("{}\t\tSave img = {}".format(ss, save_img_path)) if args.vis: concat_img = np.concatenate((left_img, color_disp), axis=0) # cv2.imshow("left_img", left_img) # cv2.imshow("raw_disp", outputs[stage]) # cv2.imshow("color_disp", color_disp) cv2.imshow("concat_img", concat_img) key = cv2.waitKey(0) if key == ord("q"): break if not args.left_img: img_name = left_imgs[index].split("/")[-1] save_img_path = os.path.join(args.save_path, img_name) cv2.imwrite(save_img_path, color_disp) LOG.info("{}\t\tSave img = {}".format(ss, save_img_path))
def __init__(self, num_samples): super(MyDataset, self).__init__() self.num_samples = num_samples # 在 `__init__` 中定义数据增强方法,此处为调整图像大小 self.transform = Compose([Resize(size=32)])
import paddle from paddle import nn from paddle import optimizer from paddle.vision.transforms import Compose, Normalize from paddle.vision.transforms import ToTensor import paddle.distributed as dist from model_zoo import Model transform_tuple = Compose([ToTensor(), Normalize()]) parallel_flag = False if __name__ == '__main__': if parallel_flag: dist.init_parallel_env() leakRelu_crossEntropy_adam = Model(transform_tuple, nn.LeakyReLU, nn.CrossEntropyLoss, optimizer.Adam) leakRelu_crossEntropy_adam.train() leakRelu_crossEntropy_adam.validate() relu_crossEntropy_sgd = Model(transform_tuple, nn.ReLU, nn.CrossEntropyLoss, optimizer.SGD) relu_crossEntropy_sgd.train() relu_crossEntropy_sgd.validate() leakReLuCrossEntropySgd = Model(transform_tuple, nn.LeakyReLU,
class Discriminator(nn.layer): def __init__(self): super(Discriminator, self).__init__() self.dis == nn.Sequential( nn.Conv2D(1, 64, 4, 2, 1, bias_attr=False), nn.LeakyReLU(0.2), nn.Conv2D(64, 64 * 2, 4, 2, 1, bias_attr=False), nn.BatchNorm2D(64 * 2), nn.LeakyReLU(0.2), nn.Conv2D(64 * 2, 64 * 4, 4, 2, 1, bias_attr=False), nn.BatchNorm2D(64 * 4), nn.LeakyReLU(0.2), nn.Conv2D(64 * 4, 1, 4, 1, 0, bias_attr=False), nn.Sigmoid()) def forward(self, x): return self.dis(x) if __name__ == '__main__': train_dataset = paddle.vision.datasets.MNIST(mode='train', transform=Compose([ Resize(size=(32, 32)), Normalize(mean=[127.5], std=[127.5]) ])) dataloader = paddle.io.DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, num_workers=4) for data in dataloader: break print(data[0].shape)
# Author: Acer Zhang # Datetime: 2021/2/25 # Copyright belongs to the author. # Please indicate the source for reprinting. import paddle from paddle.vision.transforms import Compose, Resize, ToTensor from paddle.vision.models import resnet50 from paddle.vision.datasets import Cifar100 # 导入RIFLE模块 from paddle_rifle.rifle import RIFLECallback # 定义数据预处理 transform = Compose([Resize(224), ToTensor()]) # 加载Cifar100数据集 train_data = Cifar100(transform=transform) test_data = Cifar100(mode="test", transform=transform) # 加载Resnet50 net = resnet50(True, num_classes=100) # 获取Resnet50的输出层 fc_layer = net.fc """ # 自定义网络场景下的输出层获取示例 class Net(paddle.nn.Layer): def __init__(self): super(Net, self).__init__()
MODEL_NAMES = ["RN50", "RN101", "VIT"] URL = { "RN50": "https://bj.bcebos.com/paddleaudio/examples/clip/RN50.pdparams", "RN101": "https://bj.bcebos.com/paddleaudio/examples/clip/RN101.pdparams", "VIT": "https://bj.bcebos.com/paddleaudio/examples/clip/ViT-B-32.pdparam", } MEAN, STD = (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711) _tokenizer = SimpleTokenizer() transform = Compose([ Resize(224, interpolation="bicubic"), CenterCrop(224), lambda image: image.convert("RGB"), ToTensor(), Normalize(mean=MEAN, std=STD), lambda t: t.unsqueeze_(0), ]) def tokenize(texts: Union[str, List[str]], context_length: int = 77): """ Returns the tokenized representation of given input string(s) Parameters ---------- texts : Union[str, List[str]] An input string or a list of input strings to tokenize context_length : int The context length to use; all CLIP models use 77 as the context length Returns