예제 #1
0
def generator(
        input_size=512,
        batch_size=4,
        train_list='/home/klara/klara/home/DeepSemanticText/resources/ims2.txt',
        vis=False,
        in_train=True,
        geo_type=0):
    image_list = np.array(get_images(train_list))
    print('{} training images in {}'.format(image_list.shape[0], train_list))
    index = np.arange(0, image_list.shape[0])

    allow_empty = False
    if not in_train:
        allow_empty = True

    transform = transforms.Compose([
        transforms.ColorJitter(.3, .3, .3, .3),
        transforms.RandomGrayscale(p=0.1)
    ])

    while True:
        if in_train:
            np.random.shuffle(index)
        images = []
        image_fns = []
        score_maps = []
        geo_maps = []
        training_masks = []
        gtso = []
        lbso = []
        gt_idxs = []
        im_id = 0
        for i in index:
            try:
                im_name = image_list[i]

                if in_train:
                    if random.uniform(0, 100) < 80:
                        im_name = image_list[int(
                            random.uniform(0,
                                           min(19000, image_list.shape[0] -
                                               1)))]  #use with synthetic data

                if not os.path.exists(im_name):
                    continue

                im = cv2.imread(im_name)
                if im is None:
                    continue

                allow_empty = False

                # print(im_name)
                name = os.path.basename(im_name)
                name = name[:-4]

                # print im_fn
                h, w, _ = im.shape
                txt_fn = im_name.replace(
                    os.path.basename(im_name).split('.')[1], 'txt')
                base_name = os.path.basename(txt_fn)
                txt_fn_gt = '{0}/gt_{1}'.format(os.path.dirname(im_name),
                                                base_name)
                if (not (os.path.exists(txt_fn)
                         or os.path.exists(txt_fn_gt))) and not allow_empty:
                    continue

                allow_empty = random.randint(0, 100) < 40

                if os.path.exists(txt_fn_gt) and (
                        txt_fn_gt.find('/done/') != -1
                        or txt_fn_gt.find('/icdar-2015-Ch4/') != -1):
                    text_polys, text_tags, labels_txt = load_gt_annoataion(
                        txt_fn_gt,
                        txt_fn_gt.find('/icdar-2015-Ch4/') != -1)
                elif os.path.exists(txt_fn) and (
                        txt_fn.find('/Latin/') != -1
                        or txt_fn.find('/Arabic/') != -1
                        or txt_fn.find('/Chinese/') != -1
                        or txt_fn.find('/Japanese/') != -1
                        or txt_fn.find('/Bangla/') != -1):
                    try:
                        text_polys, text_tags, labels_txt = load_annoataion(
                            txt_fn, im)  # 读取文字的坐标信息和文本
                    except:
                        print(txt_fn)
                        import traceback
                        traceback.print_exc()
                        os.remove(im_name)
                        os.remove(txt_fn)
                        continue
                else:
                    text_polys, text_tags, labels_txt = load_annoataion(
                        txt_fn, im)

                if in_train:

                    if random.uniform(0, 100) < 50 or im.shape[
                            0] < 600 or im.shape[1] < 600:  # 随机在周边填充
                        top = int(random.uniform(300, 500))
                        bottom = int(random.uniform(300, 500))
                        left = int(random.uniform(300, 500))
                        right = int(random.uniform(300, 500))
                        im = cv2.copyMakeBorder(im, top, bottom, left, right,
                                                cv2.BORDER_CONSTANT)
                        if len(text_polys) > 0:
                            text_polys[:, :, 0] += left
                            text_polys[:, :, 1] += top

                    if random.uniform(0, 100) < 30 and False:
                        im = random_rotation(im, text_polys)  # 随机旋转
                    if random.uniform(0, 100) < 30:
                        im = random_perspective(im, text_polys)  # ???随机干哈

                    #im = random_crop(im, text_polys, vis=False)

                    scalex = random.uniform(0.5, 2)  # 宽度和高度方向上随机比例
                    scaley = scalex * random.uniform(0.8, 1.2)
                    im = cv2.resize(im,
                                    dsize=(int(im.shape[1] * scalex),
                                           int(im.shape[0] * scaley)))
                    text_polys[:, :, 0] *= scalex
                    text_polys[:, :, 1] *= scaley

                    if random.randint(0, 100) < 10:
                        im = np.invert(im)

                new_h, new_w, _ = im.shape
                resize_h = input_size
                resize_w = input_size
                if input_size == -1:
                    image_size = [
                        im.shape[1] // 32 * 32, im.shape[0] // 32 * 32
                    ]
                    while image_size[0] * image_size[1] > 1024 * 1024:
                        image_size[0] /= 1.2
                        image_size[1] /= 1.2
                        image_size[0] = int(image_size[0] // 32) * 32
                        image_size[1] = int(image_size[1] // 32) * 32

                    resize_h = int(image_size[1])
                    resize_w = int(image_size[0])

                scaled = cut_image(im, (resize_w, resize_w),
                                   text_polys)  # 随机裁剪图片,裁剪后的尺寸为554,554
                if scaled.shape[0] == 0 or scaled.shape[1] == 0:
                    continue

                #transform_boxes(im, scaled, text_polys, text_tags, vis=False)

                if scaled.shape[1] != resize_w or scaled.shape[0] != resize_h:

                    #continue
                    scalex = scaled.shape[1] / resize_w
                    scaley = scaled.shape[0] / resize_h

                    if scalex < 0.5 or scaley < 0.5:
                        continue
                    scaled = cv2.resize(scaled,
                                        dsize=(int(resize_w), int(resize_h)))

                    if len(text_polys) > 0:
                        text_polys[:, :, 0] /= scalex
                        text_polys[:, :, 1] /= scaley

                im = scaled
                new_h, new_w, _ = im.shape

                # pytorch的图像处理和变化
                pim = PIL.Image.fromarray(np.uint8(im))
                pim = transform(pim)

                if use_pyblur == 1 and random.uniform(0, 100) < 30:
                    pim = RandomizedBlur(pim)

                im = np.array(pim)
                if geo_type == 0:
                    score_map, geo_map, training_mask, gt_idx, gt_out, labels_out = generate_rbox(
                        im, (new_h, new_w),
                        text_polys,
                        text_tags,
                        labels_txt,
                        vis=vis)  # 获得分割的目标值(分类,上下左右,角度)
                else:
                    score_map, geo_map, training_mask, gt_idx, gt_out, labels_out = generate_rbox2(
                        im, (new_h, new_w),
                        text_polys,
                        text_tags,
                        labels_txt,
                        vis=vis)

                if score_map.sum() == 0 and (not allow_empty):
                    #print('empty image')
                    continue

                image_fns.append(im_name)
                images.append(im[:, :, :].astype(np.float32))
                gtso.append(gt_out)
                lbso.append(labels_out)
                training_masks.append(training_mask)
                score_maps.append(score_map)
                gt_idxs.append(gt_idx)
                geo_maps.append(geo_map)

                im_id += 1

                if len(images) == batch_size:
                    images = np.asarray(images, dtype=np.float)
                    images /= 128
                    images -= 1

                    training_masks = np.asarray(training_masks, dtype=np.uint8)
                    score_maps = np.asarray(score_maps, dtype=np.uint8)
                    geo_maps = np.asarray(geo_maps, dtype=np.float)
                    gt_idxs = np.asarray(gt_idxs, dtype=np.int)

                    yield images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs
                    images = []
                    image_fns = []
                    geo_maps = []
                    score_maps = []
                    geo_maps = []
                    training_masks = []
                    gtso = []
                    lbso = []
                    gt_idxs = []
                    im_id = 0
            except Exception as e:
                import traceback
                traceback.print_exc()
                continue

        if not in_train:
            print("finish")
            yield None
            break
예제 #2
0
log_interval = 10
val_interval = 1

# ============================ step 1/5 数据 ============================

split_dir = os.path.join("rmb_split")
train_dir = os.path.join(split_dir, "train")
valid_dir = os.path.join(split_dir, "valid")

norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomGrayscale(p=0.9),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std),
])

valid_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std),
])

# 构建MyDataset实例
train_data = RMBDataset(data_dir=train_dir, transform=train_transform)
valid_data = RMBDataset(data_dir=valid_dir, transform=valid_transform)

# 构建DataLoder
예제 #3
0
    def __init__(self,
                 mode,
                 image_size,
                 file_loc="data/train_new.csv",
                 shuffle=True,
                 fold=0,
                 do_transform=True):
        self.mode = mode
        self.image_size = image_size
        self.do_transform = do_transform

        df = pd.read_csv(file_loc)
        cols_dict = dict(df.dtypes)
        cols_dict = {k: str(v) for k, v in cols_dict.items()}
        self.target_cols = [
            x for x in cols_dict.keys()
            if cols_dict[x] == 'int64' and x != 'fold'
        ]
        self.img_folder = "train" if "fold" in df.columns else "test"

        if self.mode == "train":
            self.df = df[df["fold"] != fold]
        else:
            if self.img_folder == "train":
                self.df = df[df["fold"] == fold]
            else:
                self.df = df

        if shuffle:
            self.df = self.df.sample(frac=1.0)

        self.img_transform = transforms.Compose([
            transforms.Resize((self.image_size, self.image_size)),
            transforms.RandomGrayscale(p=1),
            transforms.RandomHorizontalFlip(p=0.1),
            transforms.RandomAffine(10,
                                    translate=None,
                                    scale=None,
                                    shear=None,
                                    resample=0,
                                    fillcolor=0),
            transforms.RandomPerspective(distortion_scale=0.2, p=0.1),
            transforms.RandomRotation(10,
                                      resample=False,
                                      expand=False,
                                      center=None,
                                      fill=None),
            transforms.ColorJitter(brightness=0.1,
                                   contrast=0.1,
                                   saturation=0.1,
                                   hue=0.1),
            transforms.ToTensor(),
            transforms.RandomErasing(p=0.1,
                                     scale=(0.02, 0.1),
                                     ratio=(0.3, 3.3),
                                     inplace=True),
        ])
        self.weak_transform = transforms.Compose([
            transforms.Resize((self.image_size, self.image_size)),
            transforms.RandomGrayscale(p=1),
            transforms.ToTensor()
        ])
예제 #4
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = moco.builder.MoCo(models.__dict__[args.arch], args.moco_dim,
                              args.moco_k, args.moco_m, args.moco_t, args.mlp)
    print(model)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            message = f"=> loaded checkpoint '{args.resume}' " + \
                  f"(epoch {checkpoint['epoch']}, lr: {optimizer.param_groups[0]['lr']})"
            print(message)
            with open(args.logfile, 'a') as f:
                print(message, file=f)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'base')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    if args.aug_plus:
        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
        augmentation = [
            transforms.RandomResizedCrop(args.image_size, scale=(0.2, 1.)),
            transforms.RandomApply(
                [
                    transforms.ColorJitter(0.4, 0.4, 0.4,
                                           0.1)  # not strengthened
                ],
                p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])],
                                   p=0.5),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]
    else:
        # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978
        augmentation = [
            transforms.RandomResizedCrop(args.image_size, scale=(0.2, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ]

    train_dataset = datasets.ImageFolder(
        traindir,
        moco.loader.TwoCropsTransform(transforms.Compose(augmentation)))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)

    max_top1Acc = 0.0
    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        avgloss, accTop1, accTop5 = train(train_loader, model, criterion,
                                          optimizer, epoch, args)
        with open(args.logfile, 'a') as f:
            print(f'{avgloss:.4f}\t{accTop1:.2f}\t{accTop5:.2f}', file=f)
        # if not args.multiprocessing_distributed or (args.multiprocessing_distributed
        #         and args.rank % ngpus_per_node == 0):
        if (epoch + 1) % args.freq_save == 0:
            filename = os.path.join(
                args.checkpoint_dir,
                f'checkpoint_{epoch+1:04d}_Top1_{accTop1:.2f}.pth.tar')
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best=False,
                filename=filename)

        if max_top1Acc < accTop1:
            max_top1Acc = accTop1
            filename = os.path.join(args.checkpoint_dir,
                                    f'checkpoint_best_model.pth.tar')
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best=False,
                filename=filename)
    with open(args.logfile, 'a') as f:
        print(
            f'End time : {datetime.now():%Y-%m-%d} {datetime.now():%H:%M:%S}',
            file=f)
예제 #5
0
파일: ocr_gen.py 프로젝트: cnzeki/E2E-MLT
def generator(
        batch_size=4,
        train_list='/home/klara/klara/home/DeepSemanticText/resources/ims2.txt',
        in_train=True,
        rgb=False,
        norm_height=32):
    image_list = np.array(get_images(train_list))
    print('{} training images in {}'.format(image_list.shape[0], train_list))
    index = np.arange(0, image_list.shape[0])

    transform = transforms.Compose([
        transforms.ColorJitter(.3, .3, .3, .3),
        transforms.RandomGrayscale(p=0.1)
    ])

    batch_sizes = []
    cb = batch_size
    for i in range(0, len(buckets)):
        batch_sizes.append(cb)
        if i % 10 == 0 and cb > 2:
            cb /= 2

    max_samples = len(image_list) - 1
    bucket_images = []
    bucket_labels = []
    bucket_label_len = []

    for b in range(0, len(buckets)):
        bucket_images.append([])
        bucket_labels.append([])
        bucket_label_len.append([])

    while True:
        if in_train:
            np.random.shuffle(index)

        for i in index:
            try:
                image_name = image_list[i]

                src_del = " "
                spl = image_name.split(" ")
                if len(spl) == 1:
                    spl = image_name.split(",")
                    src_del = ","
                image_name = spl[0].strip()
                gt_txt = ''
                if len(spl) > 1:
                    gt_txt = ""
                    delim = ""
                    for k in range(1, len(spl)):
                        gt_txt += delim + spl[k]
                        delim = src_del
                    if len(gt_txt
                           ) > 1 and gt_txt[0] == '"' and gt_txt[-1] == '"':
                        gt_txt = gt_txt[1:-1]

                if len(gt_txt) == 0:
                    continue

                if image_name[len(image_name) - 1] == ',':
                    image_name = image_name[0:-1]

                if not os.path.exists(image_name):
                    continue

                if rgb:
                    im = cv2.imread(image_name)
                else:
                    im = cv2.imread(image_name, cv2.IMREAD_GRAYSCALE)
                if im is None:
                    continue

                if image_name.find('/chinese_0/') != -1:
                    im = cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE
                                    )  # horizontal chinese text

                if im.shape[0] > im.shape[1] and len(gt_txt) > 4:
                    # cv2.imshow('bad', im)
                    # print(image_name)
                    # cv2.waitKey(0)
                    continue

                scale = norm_height / float(im.shape[0])
                width = int(im.shape[1] * scale) + random.randint(
                    -2 * norm_height, 2 * norm_height)

                best_diff = width
                bestb = 0
                for b in range(0, len(buckets)):
                    if best_diff > abs(width - buckets[b]):
                        best_diff = abs(width - buckets[b])
                        bestb = b

                if random.randint(0, 100) < 10:
                    bestb += random.randint(-1, 1)
                    bestb = max(0, bestb)
                    bestb = min(bestb, (len(buckets) - 1))

                width = buckets[bestb]
                im = cv2.resize(im, (int(buckets[bestb]), norm_height))
                if not rgb:
                    im = im.reshape(im.shape[0], im.shape[1], 1)

                if in_train:
                    if random.randint(0, 100) < 10:
                        im = np.invert(im)
                    if not use_pyblur and random.randint(0, 100) < 10:
                        im = cv2.blur(im, (3, 3))
                        if not rgb:
                            im = im.reshape(im.shape[0], im.shape[1], 1)

                    if random.randint(0, 100) < 10:
                        warp_mat = cv2.getRotationMatrix2D(
                            (im.shape[1] / 2, im.shape[0] / 2), 0, 1)
                        warp_mat[0, 1] = random.uniform(-0.1, 0.1)
                        im = cv2.warpAffine(im, warp_mat,
                                            (im.shape[1], im.shape[0]))

                pim = PIL.Image.fromarray(np.uint8(im))
                pim = transform(pim)

                if use_pyblur:
                    if random.randint(0, 100) < 10:
                        pim = RandomizedBlur(pim)

                im = np.array(pim)

                bucket_images[bestb].append(im[:, :, :].astype(np.float32))

                gt_labels = []
                for k in range(len(gt_txt)):
                    if gt_txt[k] in codec_rev:
                        gt_labels.append(codec_rev[gt_txt[k]])
                    else:
                        print('Unknown char: {0}'.format(gt_txt[k]))
                        gt_labels.append(3)

                if 'ARABIC' in ud.name(gt_txt[0]):
                    gt_labels = gt_labels[::-1]

                bucket_labels[bestb].extend(gt_labels)
                bucket_label_len[bestb].append(len(gt_labels))

                if len(bucket_images[bestb]) == batch_sizes[bestb]:
                    images = np.asarray(bucket_images[bestb], dtype=np.float)
                    images /= 128
                    images -= 1

                    yield images, bucket_labels[bestb], bucket_label_len[bestb]
                    max_samples += 1
                    max_samples = min(max_samples, len(image_list) - 1)
                    bucket_images[bestb] = []
                    bucket_labels[bestb] = []
                    bucket_label_len[bestb] = []

            except Exception as e:
                import traceback
                traceback.print_exc()
                continue

        if not in_train:
            print("finish")
            yield None
            break
예제 #6
0
def main():

    args = parse_option()

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    # set the data loader
    data_folder = os.path.join(args.data_folder, 'train')
    # data_folder = args.data_folder

    image_size = 224
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = transforms.Normalize(mean=mean, std=std)

    # Data augmentation
    if args.aug == 'NULL':
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(image_size, scale=(args.crop, 1.)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    elif args.aug == 'CJ':
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(image_size, scale=(args.crop, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        raise NotImplementedError('augmentation not supported: {}'.format(
            args.aug))

    # TOMO
    # train_dataset = ImageFolderInstance(data_folder, transform=train_transform, two_crop=True)#args.moco)
    train_dataset = config.dataset['cls'](
        transform=[config.transforms_q, config.transforms_k],
        **config.dataset['params'])
    train_sampler = None
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=config.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=config.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    # create model and optimizer
    n_data = len(train_dataset)

    if args.model == 'resnet50':
        model_q = config.model['cls'](**config.model['params'])
        model_k = config.model['cls'](**config.model['params'])
    #     model_q = InsResNet50()
    #     model_k = InsResNet50()
    # elif args.model == 'resnet50x2':
    #     model_q = InsResNet50(width=2)
    #     model_k = InsResNet50(width=2)
    # elif args.model == 'resnet50x4':
    #     model_q = InsResNet50(width=4)
    #     model_k = InsResNet50(width=4)
    else:
        raise NotImplementedError('model not supported {}'.format(args.model))

    # copy weights from `model_q' to `model_k'
    moment_update(model_q, model_k, 0)

    # set the contrast memory (queue of K keys) and criterion
    contrast = MemoryMoCo(config.model['params']['params']['num_classes'],
                          n_data, args.nce_k, args.nce_t,
                          args.softmax).cuda(args.gpu)

    criterion = NCESoftmaxLoss() if args.softmax else NCECriterion(n_data)
    criterion = criterion.cuda(args.gpu)

    model_q = model_q.cuda()
    model_k = model_k.cuda()

    optimizer = torch.optim.SGD(model_q.parameters(),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    cudnn.benchmark = True
    '''
    # mixed precision training, speeds up training, however is likely to harm the downstream classification.
    if args.amp:
        model_q, optimizer = amp.initialize(model_q, optimizer, opt_level=args.opt_level)
        optimizer_ema = torch.optim.SGD(model_k.parameters(),
                                        lr=0,
                                        momentum=0,
                                        weight_decay=0)
        model_k, optimizer_ema = amp.initialize(model_k, optimizer_ema, opt_level=args.opt_level)
    '''

    args.start_epoch = 1
    '''
    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location='cpu')
            # checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch'] + 1
            model_q.load_state_dict(checkpoint['model_q'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            contrast.load_state_dict(checkpoint['contrast'])
            model_k.load_state_dict(checkpoint['model_k'])

            if args.amp and checkpoint['opt'].amp:
                print('==> resuming amp state_dict')
                amp.load_state_dict(checkpoint['amp'])

            print("=> loaded successfully '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
            del checkpoint
            torch.cuda.empty_cache()
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    '''

    # tensorboard
    logger = tb_logger.Logger(logdir=args.tb_folder, flush_secs=2)

    # routine
    for epoch in range(args.start_epoch, args.epochs + 1):

        adjust_learning_rate(epoch, args, optimizer)
        print("==> training...")

        time1 = time.time()
        loss, prob = train_moco(epoch, train_loader, model_q, model_k,
                                contrast, criterion, optimizer, args)
        time2 = time.time()
        print('epoch {}, total time {:.2f}'.format(epoch, time2 - time1))

        # tensorboard logger
        logger.log_value('ins_loss', loss, epoch)
        logger.log_value('ins_prob', prob, epoch)
        logger.log_value('learning_rate', optimizer.param_groups[0]['lr'],
                         epoch)

        # save model
        if epoch % args.save_freq == 0:
            print('==> Saving...')
            state = {
                'opt': args,
                'model_q': model_q.state_dict(),
                'contrast': contrast.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch,
            }
            state['model_k'] = model_k.state_dict()
            if args.amp:
                state['amp'] = amp.state_dict()
            save_file = os.path.join(
                args.model_folder,
                'ckpt_epoch_{epoch}.pth'.format(epoch=epoch))
            torch.save(state, save_file)
            # help release GPU memory
            del state

        torch.cuda.empty_cache()
예제 #7
0
    def __init__(self, config):
        super().__init__(config)

        input_size = config.CONFIG_OPTIONS.get(
            P.KEY_INPUT_SHAPE,
            P.GLB_PARAMS[P.KEY_DATASET_METADATA][P.KEY_DS_INPUT_SHAPE])[1]
        rel_delta = config.CONFIG_OPTIONS.get(P.KEY_DA_REL_DELTA, 0.25)
        delta = int(rel_delta * input_size)

        jit_brightness = config.CONFIG_OPTIONS.get(P.KEY_DA_JIT_BRIGHTNESS,
                                                   0.1)
        jit_contrast = config.CONFIG_OPTIONS.get(P.KEY_DA_JIT_CONTRAST, 0.1)
        jit_saturation = config.CONFIG_OPTIONS.get(P.KEY_DA_JIT_SATURATION,
                                                   0.1)
        jit_hue = config.CONFIG_OPTIONS.get(P.KEY_DA_JIT_HUE, 20 / 360)
        jit_p = config.CONFIG_OPTIONS.get(P.KEY_DA_JIT_P, 0.5)
        grayscale_p = config.CONFIG_OPTIONS.get(P.KEY_DA_GREYSCALE_P, 0.2)
        persp_scale = config.CONFIG_OPTIONS.get(P.KEY_DA_PERSP_SCALE, 0.25)
        persp_p = config.CONFIG_OPTIONS.get(P.KEY_DA_PERSP_P, 0.3)
        resize_p = config.CONFIG_OPTIONS.get(P.KEY_DA_RESIZE_P, 0.3)
        rot_degrees = config.CONFIG_OPTIONS.get(P.KEY_DA_ROT_DEGREES, 180)
        rot_p = config.CONFIG_OPTIONS.get(P.KEY_DA_ROT_P, 0.3)
        transl_p = config.CONFIG_OPTIONS.get(P.KEY_DA_TRANSL_P, 0.5)

        # A textual summary of the transformation
        self.TRANSFORM_SUMMARY = "delta" + str(delta) + "jit_brightness" + str(jit_brightness) + "jit_contrast" + str(jit_contrast) + \
         "jit_saturation" + str(jit_saturation) + "jit_hue" + str(jit_hue) + "jit_p" + str(jit_p) + "grayscale_p" + str(grayscale_p) + \
         "persp_scale" + str(persp_scale) + "persp_p" + str(persp_p) + "resize_p" + str(resize_p) + \
         "rot_degrees" + str(rot_degrees) + "rot_p" + str(rot_p) + "transl_p" + str(transl_p)

        T_augm = []
        # Random blur
        # Random noise
        T_augm.append(
            transforms.RandomApply([
                transforms.ColorJitter(brightness=jit_brightness,
                                       contrast=jit_contrast,
                                       saturation=jit_saturation,
                                       hue=jit_hue)
            ],
                                   p=jit_p))
        T_augm.append(transforms.RandomGrayscale(p=grayscale_p))
        T_augm.append(transforms.RandomHorizontalFlip())
        T_augm.append(
            transforms.RandomApply(
                [
                    transforms.Resize(
                        input_size + delta // 2
                    ),  # Random perspective tends to shrink the image, so we enlarge it beforehand
                    transforms.RandomPerspective(distortion_scale=persp_scale,
                                                 p=1.0)
                ],
                p=persp_p))
        T_augm.append(
            transforms.RandomApply(
                [
                    transforms.Lambda(
                        RandomResize(input_size - delta,
                                     input_size + delta))  # Random rescale
                ],
                p=resize_p))
        T_augm.append(
            transforms.RandomApply(
                [transforms.RandomRotation(degrees=rot_degrees, expand=True)],
                p=rot_p))
        # Random occlusion
        T_augm.append(
            transforms.RandomApply(
                [
                    transforms.CenterCrop(
                        input_size + delta),  # Take a fixed-size central crop
                    transforms.RandomCrop(
                        input_size
                    )  # Take a smaller fixed-size crop at random position (random translation)
                ],
                p=transl_p))
        self.T_augm = transforms.Compose(T_augm)
예제 #8
0
# 超参数设置
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
train_loss_everyepoch = 0
train_accuracy_everyepoch = 0
test_loss_everyepoch = 0
test_accuracy_everyepoch = 0

# Data
print('==> Preparing data..')
# torchvision.transforms是pytorch中的图像预处理包 一般用Compose把多个步骤整合到一起:
transform_train = transforms.Compose([  # 通过compose将各个变换串联起来
    transforms.RandomCrop(32, padding=4),  # 先四周填充0,再把图像随机裁剪成32*32
    transforms.RandomHorizontalFlip(),  # 以0.5的概率水平翻转给定的PIL图像
    # transforms.RandomAffine(5.0),         # python2.7 没有
    transforms.RandomGrayscale(p=0.1),  # 依概率 p 将图片转换为灰度图
    transforms.ColorJitter(brightness=0.3,
                           contrast=0.3,
                           saturation=0.4,
                           hue=0.4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),  # R,G,B每层的归一化用到的均值和方差
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

## if you cannot run the program because of "OUT OF MEMORY", you can decrease the batch_size properly.
예제 #9
0
def get_dataset(args):

    ### color augmentation ###
    color_jitter = transforms.ColorJitter(0.8 * args.color_jitter_strength,
                                          0.8 * args.color_jitter_strength,
                                          0.8 * args.color_jitter_strength,
                                          0.2 * args.color_jitter_strength)
    rnd_color_jitter = transforms.RandomApply([color_jitter], p=0.8)
    rnd_gray = transforms.RandomGrayscale(p=0.2)

    learning_type = args.train_type

    if args.dataset == 'cifar-10':

        if learning_type == 'contrastive':
            transform_train = transforms.Compose([
                rnd_color_jitter,
                rnd_gray,
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(32),
                transforms.ToTensor(),
            ])

            transform_test = transform_train

        elif learning_type == 'linear_eval':
            transform_train = transforms.Compose([
                rnd_color_jitter,
                rnd_gray,
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(32),
                transforms.ToTensor(),
            ])

            transform_test = transforms.Compose([
                transforms.ToTensor(),
            ])

        elif learning_type == 'test':
            transform_train = transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(32),
                transforms.ToTensor(),
            ])

            transform_test = transforms.Compose([
                transforms.ToTensor(),
            ])
        else:
            raise NotImplementedError('wrong learning type')

        train_dst = CIFAR10(root='./Dataset',
                            train=True,
                            download=True,
                            transform=transform_train,
                            contrastive_learning=learning_type)
        val_dst = CIFAR10(root='./Dataset',
                          train=False,
                          download=True,
                          transform=transform_test,
                          contrastive_learning=learning_type)

        if learning_type == 'contrastive':
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                train_dst,
                num_replicas=args.ngpu,
                rank=args.local_rank,
            )
            train_loader = torch.utils.data.DataLoader(
                train_dst,
                batch_size=args.batch_size,
                num_workers=4,
                pin_memory=False,
                shuffle=(train_sampler is None),
                sampler=train_sampler,
            )

            val_loader = torch.utils.data.DataLoader(
                val_dst,
                batch_size=100,
                num_workers=4,
                pin_memory=False,
                shuffle=False,
            )

            return train_loader, train_dst, val_loader, val_dst, train_sampler
        else:
            train_loader = torch.utils.data.DataLoader(
                train_dst,
                batch_size=args.batch_size,
                shuffle=True,
                num_workers=4)
            val_batch = 100
            val_loader = torch.utils.data.DataLoader(val_dst,
                                                     batch_size=val_batch,
                                                     shuffle=False,
                                                     num_workers=4)

            return train_loader, train_dst, val_loader, val_dst

    if args.dataset == 'cifar-100':

        if learning_type == 'contrastive':
            transform_train = transforms.Compose([
                rnd_color_jitter, rnd_gray,
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(32),
                transforms.ToTensor()
            ])

            transform_test = transform_train

        elif learning_type == 'linear_eval':
            transform_train = transforms.Compose([
                rnd_color_jitter, rnd_gray,
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(32),
                transforms.ToTensor()
            ])

            transform_test = transforms.Compose([transforms.ToTensor()])

        elif learning_type == 'test':
            transform_train = transforms.Compose([
                transforms.RandomCrop(32,
                                      padding=4),  # Different from cifar-10
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor()
            ])

            transform_test = transforms.Compose([transforms.ToTensor()])
        else:
            raise NotImplementedError('wrong learning type')

        train_dst = CIFAR100(root='./Dataset',
                             train=True,
                             download=True,
                             transform=transform_train,
                             contrastive_learning=learning_type)
        val_dst = CIFAR100(root='./Dataset',
                           train=False,
                           download=True,
                           transform=transform_test,
                           contrastive_learning=learning_type)

        if learning_type == 'contrastive':
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                train_dst,
                num_replicas=args.ngpu,
                rank=args.local_rank,
            )
            train_loader = torch.utils.data.DataLoader(
                train_dst,
                batch_size=args.batch_size,
                num_workers=4,
                pin_memory=True,
                shuffle=(train_sampler is None),
                sampler=train_sampler,
            )

            val_loader = torch.utils.data.DataLoader(
                val_dst,
                batch_size=100,
                num_workers=4,
                pin_memory=True,
            )
            return train_loader, train_dst, val_loader, val_dst, train_sampler

        else:
            train_loader = torch.utils.data.DataLoader(
                train_dst,
                batch_size=args.batch_size,
                shuffle=True,
                num_workers=4)

            val_loader = torch.utils.data.DataLoader(val_dst,
                                                     batch_size=100,
                                                     shuffle=False,
                                                     num_workers=4)

            return train_loader, train_dst, val_loader, val_dst
예제 #10
0
파일: util.py 프로젝트: uyuutosa/aimaker
    def parseTransform(self, names):
        lst = []
        for name in names.split('_'):
            if "resize" in name:
                value = name.strip("resize")
                lst += [tt.Resize(self._parse_x(value))]
            elif "normalize" in name:
                value = self._parse_comma(name.strip("normalize"))
                if len(value) == 2:
                    lst += [tt.Normalize(value[0:1], value[1:2])]
                else:
                    lst += [tt.Normalize(value[0:3], value[3:6])]
            elif "toTensor" in name:
                lst += [tt.ToTensor()]
            elif "randomCrop" in name:
                value = name.strip("randomCrop")
                lst += [tt.RandomCrop(self._parse_x(value))]
            elif "centerCrop" in name:
                value = name.strip("centerCrop")
                lst += [tt.CenterCrop(self._parse_x(value))]
            elif "randomVerticalFlip" in name:
                lst += [tt.RandomVerticalFlip()]
            elif "randomResizedCrop" in name:
                v = self._parse_comma(name.strip("randomResizedCrop"))
                lst += [
                    tt.RandomResizedCrop(int(v[0]), v[1:3], v[3:5], int(v[5]))
                ]
            elif "grayScale" in name:
                value = int(name.strip("grayScale"))
                lst += [tt.grayScale(value)]
            elif "randomRotation" in name:
                striped_name = name.strip("randomRotation")
                if ',' in striped_name:
                    v = self._parse_comma(striped_name)
                else:
                    v = int(striped_name)
                lst += [tt.RandomRotation(v)]
            elif "randomGrayscale" in name:
                v = float(value)
                lst += [tt.RandomGrayscale(v)]
            elif "toPILImage" in name:
                lst += [tt.ToPILImage()]
            elif "randomHorizontalFlip" in name:
                lst += [tt.RandomHorizontalFlip()]
            elif "randomVerticalFlip" in name:
                lst += [tt.RandomVerticalFlip()]
            elif "randomBackground" in name:
                lst += [tt.RandomBackground(self.setting['data']['base'])]
            elif "pad" in name:
                striped_name = name.strip("pad")
                v = int(striped_name)
                lst += [tt.Pad(v)]
            elif "randomPad" in name:
                striped_name = name.strip("randomPad")
                v = self._parse_comma(striped_name)
                lst += [my_transforms.RandomPad(v[0], v[1])]
            elif "colorJitter" in name:
                v = self._parse_comma(name.strip("colorJitter"))
                # default
                # tt.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0)
                lst += [
                    tt.ColorJitter(brightness=v[0],
                                   contrast=v[1],
                                   saturation=v[2],
                                   hue=v[3])
                ]
            elif 'toNumpy' in name:
                lst += [my_transforms.ToNumpy()]
            elif 'correctExif' in name:
                lst += [my_transforms.CorrectExif()]
            elif 'randomAffine' in name:
                value = self._parse_comma(name.strip('randomAffine'))
                lst += [
                    tt.RandomAffine(degrees=value[0:2],
                                    translate=value[2:4],
                                    scale=value[4:6],
                                    shear=value[6:8])
                ]
            elif 'humanCrop' in name:
                value = self._parse_comma(name.strip("humanCrop"))
                lst += [
                    my_transforms.HumanCrop(margin=value[0],
                                            weight_path=self.setting['data']
                                            ['base']['openPosePath'],
                                            scale=value[1],
                                            gpu_ids=str(int(value[2])))
                ]
            elif 'toHSV' in name:
                lst += [my_transforms.ToHSV()]
            elif name == 'None':
                pass
            else:
                raise NotImplementedError(
                    "{} is could not parse, this function is not implemented.".
                    format(name))

        return lst
예제 #11
0
parser.add_argument('--exp', default='./cifar', type=str, help='experimentdir')
parser.add_argument('--type', default='10', type=int, help='cifar10 or 100')

args = parser.parse_args()
setup_runtime(2, [args.device])
device = 'cuda' if torch.cuda.is_available() else 'cpu'
knn_dim = 4096
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
# Data
print('==> Preparing data..')  #########################
transform_train = tfs.Compose([
    tfs.Resize(256),
    tfs.RandomResizedCrop(size=224, scale=(0.2, 1.)),
    tfs.ColorJitter(0.4, 0.4, 0.4, 0.4),
    tfs.RandomGrayscale(p=0.2),
    tfs.RandomHorizontalFlip(),
    tfs.ToTensor(),
    tfs.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = tfs.Compose([
    tfs.Resize(256),
    tfs.CenterCrop(224),
    tfs.ToTensor(),
    tfs.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

if args.type == 10:
    trainset = CIFAR10Instance(root=args.datadir,
                               train=True,
                               download=True,
예제 #12
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = UniMoCo(models.__dict__[args.arch], args.moco_dim, args.moco_k,
                    args.moco_m, args.moco_t, args.mlp)
    print(model)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    criterion = UnifiedContrastive().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    if args.aug_plus:
        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomApply(
                [
                    transforms.ColorJitter(0.4, 0.4, 0.4,
                                           0.1)  # not strengthened
                ],
                p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]
    else:
        # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ]

    train_dataset = datasets.ImageFolder(
        traindir, TwoCropsTransform(transforms.Compose(augmentation)))

    # build a semi-supervised dataset according to the file
    if os.path.exists(args.supervised_list):
        supervised_set = set()
        for line in open(args.supervised_list, 'r'):
            supervised_set.add(line.strip())
        samples = []
        for path, target in train_dataset.samples:
            filename = os.path.basename(path)
            if filename in supervised_set:
                samples.append((path, target))
            else:
                samples.append((path, -1))
        assert len(supervised_set) == len(
            [label for _, label in samples if label != -1])
        print(
            f'Training with {len(supervised_set)} supervised images and {len(samples)-len(supervised_set)} unsupervised images.'
        )
    else:
        print("Running with full superivsed reprensentation learning.")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best=False,
                filename='checkpoint_{:04d}.pth.tar'.format(epoch))
def main():

    global best_acc1
    best_acc1 = 0

    args = parse_option()

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    # set the data loader
    train_folder = os.path.join(args.data_folder, 'train')
    val_folder = os.path.join(args.data_folder, 'val')

    image_size = 224
    crop_padding = 32
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = transforms.Normalize(mean=mean, std=std)

    if args.aug == 'NULL':
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(image_size, scale=(args.crop, 1.)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    elif args.aug == 'CJ':
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(image_size, scale=(args.crop, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        raise NotImplementedError('augmentation not supported: {}'.format(
            args.aug))

    train_dataset = datasets.ImageFolder(train_folder, train_transform)
    val_dataset = datasets.ImageFolder(
        val_folder,
        transforms.Compose([
            transforms.Resize(image_size + crop_padding),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            normalize,
        ]))

    print(len(train_dataset))
    train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.num_workers,
                                               pin_memory=True,
                                               sampler=train_sampler)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers,
                                             pin_memory=True)

    # create model and optimizer
    if args.model == 'resnet50':
        model = InsResNet50()
        classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 1)
    elif args.model == 'resnet50x2':
        model = InsResNet50(width=2)
        classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 2)
    elif args.model == 'resnet50x4':
        model = InsResNet50(width=4)
        classifier = LinearClassifierResNet(args.layer, args.n_label, 'avg', 4)
    else:
        raise NotImplementedError('model not supported {}'.format(args.model))

    print('==> loading pre-trained model')
    ckpt = torch.load(args.model_path)
    model.load_state_dict(ckpt['model'])
    print("==> loaded checkpoint '{}' (epoch {})".format(
        args.model_path, ckpt['epoch']))
    print('==> done')

    model = model.cuda()
    classifier = classifier.cuda()

    criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu)

    if not args.adam:
        optimizer = torch.optim.SGD(classifier.parameters(),
                                    lr=args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    else:
        optimizer = torch.optim.Adam(classifier.parameters(),
                                     lr=args.learning_rate,
                                     betas=(args.beta1, args.beta2),
                                     weight_decay=args.weight_decay,
                                     eps=1e-8)

    model.eval()
    cudnn.benchmark = True

    # set mixed precision training
    # if args.amp:
    #     model = amp.initialize(model, opt_level=args.opt_level)
    #     classifier, optimizer = amp.initialize(classifier, optimizer, opt_level=args.opt_level)

    # optionally resume from a checkpoint
    args.start_epoch = 1
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location='cpu')
            # checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch'] + 1
            classifier.load_state_dict(checkpoint['classifier'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            best_acc1 = checkpoint['best_acc1']
            best_acc1 = best_acc1.cuda()
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            if 'opt' in checkpoint.keys():
                # resume optimization hyper-parameters
                print('=> resume hyper parameters')
                if 'bn' in vars(checkpoint['opt']):
                    print('using bn: ', checkpoint['opt'].bn)
                if 'adam' in vars(checkpoint['opt']):
                    print('using adam: ', checkpoint['opt'].adam)
                if 'cosine' in vars(checkpoint['opt']):
                    print('using cosine: ', checkpoint['opt'].cosine)
                args.learning_rate = checkpoint['opt'].learning_rate
                # args.lr_decay_epochs = checkpoint['opt'].lr_decay_epochs
                args.lr_decay_rate = checkpoint['opt'].lr_decay_rate
                args.momentum = checkpoint['opt'].momentum
                args.weight_decay = checkpoint['opt'].weight_decay
                args.beta1 = checkpoint['opt'].beta1
                args.beta2 = checkpoint['opt'].beta2
            del checkpoint
            torch.cuda.empty_cache()
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # set cosine annealing scheduler
    if args.cosine:

        # last_epoch = args.start_epoch - 2
        # eta_min = args.learning_rate * (args.lr_decay_rate ** 3) * 0.1
        # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min, last_epoch)

        eta_min = args.learning_rate * (args.lr_decay_rate**3) * 0.1
        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer, args.epochs, eta_min, -1)
        # dummy loop to catch up with current epoch
        for i in range(1, args.start_epoch):
            scheduler.step()

    # tensorboard
    logger = tb_logger.Logger(logdir=args.tb_folder, flush_secs=2)

    # routine
    for epoch in range(args.start_epoch, args.epochs + 1):

        if args.cosine:
            scheduler.step()
        else:
            adjust_learning_rate(epoch, args, optimizer)
        print("==> training...")

        time1 = time.time()
        train_acc, train_acc5, train_loss = train(epoch, train_loader, model,
                                                  classifier, criterion,
                                                  optimizer, args)
        time2 = time.time()
        print('train epoch {}, total time {:.2f}'.format(epoch, time2 - time1))

        logger.log_value('train_acc', train_acc, epoch)
        logger.log_value('train_acc5', train_acc5, epoch)
        logger.log_value('train_loss', train_loss, epoch)
        logger.log_value('learning_rate', optimizer.param_groups[0]['lr'],
                         epoch)

        print("==> testing...")
        test_acc, test_acc5, test_loss = validate(val_loader, model,
                                                  classifier, criterion, args)

        logger.log_value('test_acc', test_acc, epoch)
        logger.log_value('test_acc5', test_acc5, epoch)
        logger.log_value('test_loss', test_loss, epoch)

        # save the best model
        if test_acc > best_acc1:
            best_acc1 = test_acc
            state = {
                'opt': args,
                'epoch': epoch,
                'classifier': classifier.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            }
            save_name = '{}_layer{}.pth'.format(args.model, args.layer)
            save_name = os.path.join(args.save_folder, save_name)
            print('saving best model!')
            torch.save(state, save_name)

        # save model
        if epoch % args.save_freq == 0:
            print('==> Saving...')
            state = {
                'opt': args,
                'epoch': epoch,
                'classifier': classifier.state_dict(),
                'best_acc1': test_acc,
                'optimizer': optimizer.state_dict(),
            }
            save_name = 'ckpt_epoch_{epoch}.pth'.format(epoch=epoch)
            save_name = os.path.join(args.save_folder, save_name)
            print('saving regular model!')
            torch.save(state, save_name)

        # tensorboard logger
        pass
예제 #14
0
def main():
    global args, sv_name, logs_dir, checkpoint_dir

    write_arguments_to_file(
        args, os.path.join('./', sv_name, sv_name + '_arguments.txt'))

    use_cuda = torch.cuda.is_available()

    if use_cuda:
        torch.backends.cudnn.enabled = True
        cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_data_transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomGrayscale(p=0.2),
        transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normalize
    ])

    val_data_transform = transforms.Compose(
        [transforms.Resize((256, 256)),
         transforms.ToTensor(), normalize])

    train_dataGen = DataGeneratorSplitting(data=args.data,
                                           dataset=args.dataset,
                                           imgExt=args.imgEXT,
                                           imgTransform=train_data_transform,
                                           phase='train')

    val_dataGen = DataGeneratorSplitting(data=args.data,
                                         dataset=args.dataset,
                                         imgExt=args.imgEXT,
                                         imgTransform=val_data_transform,
                                         phase='val')

    train_data_loader = DataLoader(train_dataGen,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers,
                                   shuffle=True,
                                   pin_memory=True)
    val_data_loader = DataLoader(val_dataGen,
                                 batch_size=args.batch_size,
                                 num_workers=args.num_workers,
                                 shuffle=False,
                                 pin_memory=True)
    trainloader_wo_shuf = DataLoader(train_dataGen,
                                     batch_size=args.batch_size,
                                     num_workers=args.num_workers,
                                     shuffle=False,
                                     pin_memory=True)

    # create model and optimizer
    n_data = len(train_dataGen)

    model = ResNet18_cls(clsNum=len(train_dataGen.sceneList),
                         dim=args.dim).cuda()
    model_ema = ResNet18_cls(clsNum=len(train_dataGen.sceneList),
                             dim=args.dim).cuda()

    y_true = []

    for idx, data in enumerate(
            tqdm(trainloader_wo_shuf, desc="extracting training labels")):

        label_batch = data['label'].to(torch.device("cpu"))

        y_true += list(np.squeeze(label_batch.numpy()).astype(np.float32))

    y_true = np.asarray(y_true)

    CELoss = torch.nn.CrossEntropyLoss().cuda()

    # copy weights from `model' to `model_ema'
    moment_update(model, model_ema, 0)

    criterion = NCACrossEntropy(torch.LongTensor(y_true),
                                args.margin / args.temperature).cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=0.9,
                                weight_decay=1e-4,
                                nesterov=True)

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)

    train_writer = SummaryWriter(
        os.path.join(logs_dir, 'runs', sv_name, 'training'))
    val_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'val'))

    best_acc = 0
    start_epoch = 0

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch'] + 1
            model.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            lemniscate = checkpoint['lemniscate']
            model_ema.load_state_dict(checkpoint['model_ema'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))

        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        # define lemniscate and loss function (criterion)
        ndata = len(train_dataGen)
        lemniscate = LinearAverage(args.dim, ndata, args.temperature).cuda()

    for epoch in range(start_epoch, args.epochs):

        print('Epoch {}/{}'.format(epoch, args.epochs - 1))
        print('-' * 10)

        train_Moco(train_data_loader, model, model_ema, lemniscate, criterion,
                   CELoss, optimizer, epoch, train_writer)
        acc = val(val_data_loader, trainloader_wo_shuf, model, epoch,
                  val_writer)

        is_best_acc = acc > best_acc
        best_acc = max(best_acc, acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                # 'arch': args.arch,
                'model_state_dict': model.state_dict(),
                'lemniscate': lemniscate,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            },
            is_best_acc,
            sv_name)

        scheduler.step()
                                 nn.Linear(100, 10))
model.to(device)

# Load Data
my_transforms = transforms.Compose(
    [  # Compose makes it possible to have many transforms
        transforms.Resize((36, 36)),  # Resizes (32,32) to (36,36)
        transforms.RandomCrop((32, 32)),  # Takes a random (32,32) crop
        transforms.ColorJitter(brightness=0.5),  # Change brightness of image
        transforms.RandomRotation(
            degrees=45),  # Perhaps a random rotation from -45 to 45 degrees
        transforms.RandomHorizontalFlip(
            p=0.5),  # Flips the image horizontally with probability 0.5
        transforms.RandomVerticalFlip(
            p=0.05),  # Flips image vertically with probability 0.05
        transforms.RandomGrayscale(
            p=0.2),  # Converts to grayscale with probability 0.2
        transforms.ToTensor(
        ),  # Finally converts PIL image to tensor so we can train w. pytorch
        transforms.Normalize(mean=[0.5, 0.5, 0.5],
                             std=[0.5, 0.5,
                                  0.5]),  # Note: these values aren't optimal
    ])

train_dataset = datasets.CIFAR10(root="dataset/",
                                 train=True,
                                 transform=my_transforms,
                                 download=True)
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True)
예제 #16
0
def main():
    """
        Image Classification Network Trainer
    """
    parser = train_args.get_args()
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + __version__ + ' by ' +
                        __author__)
    cli_args = parser.parse_args()

    # check for data directory
    if not os.path.isdir(cli_args.data_directory):
        print(f'Data directory {cli_args.data_directory} was not found.')
        exit(1)

    # check for save directory
    if not os.path.isdir(cli_args.save_dir):
        print(f'Directory {cli_args.save_dir} does not exist. Creating...')
        os.makedirs(cli_args.save_dir)

    # load categories
    with open(cli_args.categories_json, 'r') as f:
        cat_to_name = json.load(f)

    # set output to the number of categories
    output_size = len(cat_to_name)
    print(f"Images are labeled with {output_size} categories.")

    # prep data loader
    expected_means = [0.485, 0.456, 0.406]
    expected_std = [0.229, 0.224, 0.225]
    max_image_size = 224
    batch_size = 32

    training_transforms = transforms.Compose([
        transforms.RandomHorizontalFlip(p=0.25),
        transforms.RandomRotation(25),
        transforms.RandomGrayscale(p=0.02),
        transforms.RandomResizedCrop(max_image_size),
        transforms.ToTensor(),
        transforms.Normalize(expected_means, expected_std)
    ])

    training_dataset = datasets.ImageFolder(cli_args.data_directory,
                                            transform=training_transforms)

    training_dataloader = torch.utils.data.DataLoader(training_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=True)

    # Make model
    if not cli_args.arch.startswith("vgg") and not cli_args.arch.startswith(
            "densenet"):
        print("Only supporting VGG and DenseNet")
        exit(1)

    print(f"Using a pre-trained {cli_args.arch} network.")
    nn_model = models.__dict__[cli_args.arch](pretrained=True)

    densenet_input = {
        'densenet121': 1024,
        'densenet169': 1664,
        'densenet161': 2208,
        'densenet201': 1920
    }

    input_size = 0

    # Input size from current classifier if VGG
    if cli_args.arch.startswith("vgg"):
        input_size = nn_model.classifier[0].in_features

    if cli_args.arch.startswith("densenet"):
        input_size = densenet_input[cli_args.arch]

    # Prevent back propagation on parameters
    for param in nn_model.parameters():
        param.requires_grad = False

    od = OrderedDict()
    hidden_sizes = cli_args.hidden_units

    hidden_sizes.insert(0, input_size)

    print(
        f"Building a {len(cli_args.hidden_units)} hidden layer classifier with inputs {cli_args.hidden_units}"
    )

    for i in range(len(hidden_sizes) - 1):
        od['fc' + str(i + 1)] = nn.Linear(hidden_sizes[i], hidden_sizes[i + 1])
        od['relu' + str(i + 1)] = nn.ReLU()
        od['dropout' + str(i + 1)] = nn.Dropout(p=0.15)

    od['output'] = nn.Linear(hidden_sizes[i + 1], output_size)
    od['softmax'] = nn.LogSoftmax(dim=1)

    classifier = nn.Sequential(od)

    # Replace classifier
    nn_model.classifier = classifier

    # Start clean by setting gradients of all parameters to zero.
    nn_model.zero_grad()

    # The negative log likelihood loss as criterion.
    criterion = nn.NLLLoss()

    # Adam: A Method for Stochastic Optimization
    # https://arxiv.org/abs/1412.6980
    print(f"Setting optimizer learning rate to {cli_args.learning_rate}.")
    optimizer = optim.Adam(nn_model.classifier.parameters(),
                           lr=cli_args.learning_rate)

    # Start with CPU
    device = torch.device("cpu")

    # Requested GPU
    if cli_args.use_gpu and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        print("GPU is not available. Using CPU.")

    print(f"Sending model to device {device}.")
    nn_model = nn_model.to(device)

    data_set_len = len(training_dataloader.batch_sampler)

    chk_every = 50

    print(f'Using the {device} device to train.')
    print(
        f'Training on {data_set_len} batches of {training_dataloader.batch_size}.'
    )
    print(
        f'Displaying average loss and accuracy for epoch every {chk_every} batches.'
    )

    for e in range(cli_args.epochs):
        e_loss = 0
        prev_chk = 0
        total = 0
        correct = 0
        print(
            f'\nEpoch {e+1} of {cli_args.epochs}\n----------------------------'
        )
        for ii, (images, labels) in enumerate(training_dataloader):
            # Move images and labeles perferred device
            # if they are not already there
            images = images.to(device)
            labels = labels.to(device)

            # Set gradients of all parameters to zero.
            optimizer.zero_grad()

            # Propigate forward and backward
            outputs = nn_model.forward(images)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Keep a running total of loss for
            # this epoch
            e_loss += loss.item()

            # Accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Keep a running total of loss for
            # this epoch
            itr = (ii + 1)
            if itr % chk_every == 0:
                avg_loss = f'avg. loss: {e_loss/itr:.4f}'
                acc = f'accuracy: {(correct/total) * 100:.2f}%'
                print(
                    f'  Batches {prev_chk:03} to {itr:03}: {avg_loss}, {acc}.')
                prev_chk = (ii + 1)

    print('Done... Saving')

    nn_model.class_to_idx = training_dataset.class_to_idx
    model_state = {
        'epoch': cli_args.epochs,
        'state_dict': nn_model.state_dict(),
        'optimizer_dict': optimizer.state_dict(),
        'classifier': nn_model.classifier,
        'class_to_idx': nn_model.class_to_idx,
        'arch': cli_args.arch
    }

    save_location = f'{cli_args.save_dir}/{cli_args.save_name}.pth'
    print(f"Saving checkpoint to {save_location}")

    torch.save(model_state, save_location)
예제 #17
0
def main():
    parser = argparse.ArgumentParser(description='Image Classification.')
    parser.add_argument('--model-name', type=str, default='resnet50')
    parser.add_argument(
        '--checkpoint-path',
        type=str,
        default='./tl/combine/sun_flow_combine_tl',
        help=
        'Path to save checkpoint, only the model with highest top1 acc will be saved,'
        'And the records will also be writen in the folder')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        help='Batch size')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        help='Initial learning rate')
    parser.add_argument('--epoch',
                        type=int,
                        default=100,
                        help='Maximum training epoch')
    parser.add_argument('--start-epoch',
                        type=int,
                        default=0,
                        help='Start training epoch')

    parser.add_argument('--root-dir',
                        type=str,
                        default='../data/',
                        help='path to the image folder')
    parser.add_argument('--SUN-dir',
                        type=str,
                        default='./SUN397',
                        help='path to dataset')
    parser.add_argument('--flower-dir',
                        type=str,
                        default='./tl-ssl/data/102Flower',
                        help='path to the image folder')
    parser.add_argument(
        '--train-json',
        type=str,
        default='../data/train2018.json',
        help='path to the train folder, each class has a single folder')
    parser.add_argument(
        '--val-json',
        type=str,
        default='../data/val2018.json',
        help='path to the validation folder, each class has a single folder')
    # parser.add_argument('--test-dir', type=str, default='xxx/test',
    #                     help='path to the train folder, each class has a single folder')
    parser.add_argument('--cos',
                        type=bool,
                        default=False,
                        help='Use cos learning rate sheduler')
    parser.add_argument('--schedule',
                        default=[50, 100, 150],
                        nargs='*',
                        type=int,
                        help='learning rate schedule (when to drop lr by 10x)')
    parser.add_argument('--ratio', type=float, default=0.1)
    parser.add_argument(
        '--pretrained',
        type=str,
        default='Resume',
        help='Load which pretrained model, '
        'None : Do not load any weight, random initialize'
        'Imagenet : official Imagenet pretrained model,'
        'MoCo : Transfer model from Moco, path in $transfer-resume$'
        'Transfer : Transfer model from Supervised pretrained, path in $transfer-resume$'
        'Resume : Load checkpoint for corrupted training process, path in $resume$'
    )
    parser.add_argument('--transfer-resume',
                        type=str,
                        default='',
                        help='Path to load transfering pretrained model')
    parser.add_argument('--resume',
                        type=str,
                        default='',
                        help='Path to resume a checkpoint')
    parser.add_argument('--num-class',
                        type=int,
                        default=499,
                        help='Number of class for the classification')
    parser.add_argument('--PRINT-INTERVAL',
                        type=int,
                        default=20,
                        help='Number of batch to print the loss')
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device {}".format(device))
    # Create checkpoint file

    if os.path.exists(args.checkpoint_path) == False:
        os.makedirs(args.checkpoint_path)

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    test_trans = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop((224, 224)),
        transforms.ToTensor(), normalize
    ])

    augmentation = [
        transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
        transforms.RandomApply(
            [
                transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)  # not strengthened
            ],
            p=0.8),
        transforms.RandomGrayscale(p=0.2),
        transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=0.5),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ]
    flower = Flower102(root_dir=args.flower_dir, mode='train')

    SUNdataset = SUNDataset(root=args.SUN_dir, number_per_class=1000)

    trainset = CombineDataset(images1=flower.images,
                              images2=SUNdataset.images,
                              labels1=flower.labels,
                              labels2=SUNdataset.labels,
                              transforms=transforms.Compose(augmentation))

    valset = Flower102(root_dir=args.flower_dir,
                       mode='val',
                       transforms=test_trans)

    train_loader = DataLoader(trainset,
                              batch_size=args.batch_size,
                              num_workers=8,
                              shuffle=True,
                              pin_memory=True)
    val_loader = DataLoader(valset,
                            batch_size=128,
                            num_workers=8,
                            pin_memory=True,
                            shuffle=True)
    # test_loader = DataLoader(testset,batch_size=args.batch_size)

    LOSS_FUNC = nn.CrossEntropyLoss().to(device)
    print(args.model_name)

    if args.pretrained == 'Imagenet':
        # ImageNet supervised pretrained model
        print('ImageNet supervised pretrained model')
        model = MODEL_DICT[args.model_name](num_classes=args.num_class,
                                            pretrained=True)
    elif args.pretrained == 'MoCo':
        # load weight from transfering model from moco
        print('Load weight from transfering model from moco')
        model = MODEL_DICT[args.model_name](num_classes=args.num_class,
                                            pretrained=False)
        if args.transfer_resume:
            if os.path.isfile(args.transfer_resume):
                print("=> loading checkpoint '{}'".format(
                    args.transfer_resume))
                checkpoint = torch.load(args.pretrained, map_location="cpu")

                # rename moco pre-trained keys
                state_dict = checkpoint['state_dict']
                for k in list(state_dict.keys()):
                    # retain only encoder_q up to before the embedding layer
                    if k.startswith('module.encoder_q') and not k.startswith(
                            'module.encoder_q.fc'):
                        # remove prefix
                        state_dict[
                            k[len("module.encoder_q."):]] = state_dict[k]
                    # delete renamed or unused k
                    del state_dict[k]

                msg = model.load_state_dict(state_dict, strict=False)
                assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}

                print("=> loaded pre-trained model '{}'".format(
                    args.transfer_resume))
            else:
                print("=> no checkpoint found at '{}'".format(
                    args.transfer_resume))

        # freeze all layers but the last fc
        for name, param in model.named_parameters():
            if name not in ['fc.weight', 'fc.bias']:
                param.requires_grad = False
        # init the fc layer
        model.fc.weight.data.normal_(mean=0.0, std=0.01)
        model.fc.bias.data.zero_()

    elif args.pretrained == 'Transfer':
        # load weight from transfering model from supervised pretraining
        print('Load weight from transfering model from supervised pretraining')
        model = MODEL_DICT[args.model_name](num_classes=args.num_class,
                                            pretrained=False)
        if args.transfer_resume:
            if os.path.isfile(args.transfer_resume):
                print("=> loading checkpoint '{}'".format(
                    args.transfer_resume))

                checkpoint = torch.load(args.transfer_resume)
                msg = model.load_state_dict(checkpoint, strict=False)
                assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}
                print("=> loaded checkpoint '{}' (epoch {})".format(
                    args.transfer_resume, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(
                    args.transfer_resume))

        for name, param in model.named_parameters():
            if name not in ['fc.weight', 'fc.bias']:
                param.requires_grad = False
        # init the fc layer
        model.fc.weight.data.normal_(mean=0.0, std=0.01)
        model.fc.bias.data.zero_()
    else:
        # Random Initialize
        print('Random Initialize')
        model = MODEL_DICT[args.model_name](num_classes=args.num_class,
                                            pretrained=False)

    # Dataparallel for multiple GPU usage
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
    model = model.to(device)

    # Optimizer and learning rate scheduler

    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)

    if args.pretrained == 'Resume':
        # load weight from checkpoint
        print('Load weight from checkpoint {}'.format(args.resume))
        load_resume(args, model, optimizer, args.resume)

    metric = []

    for epoch in range(args.start_epoch, args.epoch):
        adjust_learning_rate(optimizer, epoch, args)
        train_loss = train(model, train_loader, optimizer, args.PRINT_INTERVAL,
                           epoch, args, LOSS_FUNC, device)
        acc1, acc5, confusion_matrix, val_loss, aucs = test(
            model, val_loader, args.num_class, LOSS_FUNC, device)
        metric.append(acc1)

        # Save train/val loss, acc1, acc5, confusion matrix(F1, recall, precision), AUCs
        record = {
            'epoch': epoch + 1,
            'train loss': train_loss,
            'val loss': val_loss,
            'acc1': acc1,
            'acc5': acc5,
            'confusion matrix': confusion_matrix,
            'AUCs': aucs
        }
        torch.save(
            record,
            os.path.join(args.checkpoint_path,
                         'recordEpoch{}.pth.tar'.format(epoch)))
        # Only save the model with highest top1 acc
        if np.max(metric) == acc1:
            checkpoint = {
                'epoch': epoch + 1,
                'arch': args.model_name,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            torch.save(checkpoint,
                       os.path.join(args.checkpoint_path, 'best.pth.tar'))
            print("Model Saved")
예제 #18
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        if not args.cifar:
            model = models.__dict__[args.arch](low_dim=args.low_dim)
        else:
            model = models.resnet_cifar.__dict__[args.arch](
                low_dim=args.low_dim)

    if not args.distributed:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)

    # Data loading code
    '''
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolderInstance(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224, scale=(0.2,1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolderInstance(valdir, transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)
'''
    transform_train = transforms.Compose([
        transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
        transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
        transforms.RandomGrayscale(p=0.2),
        #transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    #    normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    #    transform = transforms.Compose([transforms.ToTensor(), normalize])

    train_dataset = datasets.CIFAR10Instance(root=args.data,
                                             train=True,
                                             download=True,
                                             transform=transform_train)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=250,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    val_dataset = datasets.CIFAR10Instance(root=args.data,
                                           train=False,
                                           download=True,
                                           transform=transform_test)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=250,
                                             shuffle=False,
                                             num_workers=4,
                                             pin_memory=True)

    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    # define lemniscate and loss function (criterion)
    ndata = train_dataset.__len__()
    if args.nce_k > 0:
        lemniscate = NCEAverage(args.low_dim, ndata, args.nce_k, args.nce_t,
                                args.nce_m)
        criterion = NCECriterion(ndata).cuda()
    else:
        lemniscate = LinearAverage(args.low_dim, ndata, args.nce_t, args.nce_m)
        criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            lemniscate = checkpoint['lemniscate']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    if args.evaluate:
        kNN(0, model, lemniscate, train_loader, val_loader, 200, args.nce_t)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch)

        if epoch % 10 == 0:
            # remember best prec@1 and save checkpoint
            prec1 = kNN(0, model, lemniscate, train_loader, val_loader, 200,
                        args.nce_t)
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'lemniscate': lemniscate,
                    'best_prec1': best_prec1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)
            # evaluate KNN after last epoch

        # train for one epoch
        train(train_loader, model, lemniscate, criterion, optimizer, epoch)
예제 #19
0
def main():
    global args, sv_name, logs_dir, checkpoint_dir

    write_arguments_to_file(
        args, os.path.join('./', sv_name, sv_name + '_arguments.txt'))

    use_cuda = torch.cuda.is_available()

    if use_cuda:
        torch.backends.cudnn.enabled = True
        cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_data_transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomGrayscale(p=0.2),
        transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normalize
    ])

    test_data_transform = transforms.Compose(
        [transforms.Resize((256, 256)),
         transforms.ToTensor(), normalize])

    train_dataGen = DataGeneratorSM(data=args.data,
                                    dataset=args.dataset,
                                    train_per=args.train_per,
                                    imgExt=args.imgEXT,
                                    imgTransform=train_data_transform,
                                    phase='train')

    train_test_dataGen = DataGeneratorSM(data=args.data,
                                         dataset=args.dataset,
                                         train_per=args.train_per,
                                         imgExt=args.imgEXT,
                                         imgTransform=train_data_transform,
                                         phase='test')

    train_dataGen_ = DataGeneratorSM(data=args.data,
                                     dataset=args.dataset,
                                     train_per=args.train_per,
                                     imgExt=args.imgEXT,
                                     imgTransform=test_data_transform,
                                     phase='train')

    test_dataGen = DataGeneratorSM(data=args.data,
                                   dataset=args.dataset,
                                   train_per=args.train_per,
                                   imgExt=args.imgEXT,
                                   imgTransform=test_data_transform,
                                   phase='test')

    train_data_loader = DataLoader(train_dataGen,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers,
                                   shuffle=True,
                                   pin_memory=True)
    train_test_data_loader = DataLoader(train_test_dataGen,
                                        batch_size=args.batch_size,
                                        num_workers=args.num_workers,
                                        shuffle=True,
                                        pin_memory=True)
    test_data_loader = DataLoader(test_dataGen,
                                  batch_size=args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=False,
                                  pin_memory=True)
    trainloader_wo_shuf = DataLoader(train_dataGen_,
                                     batch_size=args.batch_size,
                                     num_workers=args.num_workers,
                                     shuffle=False,
                                     pin_memory=True)

    if args.MLP:
        model = ResNet18_MLP(dim=args.dim).cuda()
    else:
        model = ResNet18(dim=args.dim).cuda()

    loss_fn = NormSoftmaxLoss_Margin(args.dim,
                                     len(train_dataGen.sceneList),
                                     margin=args.margin,
                                     temperature=args.temperature).cuda()

    optimizer = torch.optim.SGD(list(model.parameters()) +
                                list(loss_fn.parameters()),
                                args.lr,
                                momentum=0.9,
                                weight_decay=1e-4,
                                nesterov=True)

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)

    best_acc = 0
    start_epoch = 0

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['model_state_dict'])
            loss_fn.load_state_dict(checkpoint['loss_state_dict'])
            # lemniscate = checkpoint['lemniscate']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    train_writer = SummaryWriter(
        os.path.join(logs_dir, 'runs', sv_name, 'training'))
    val_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'val'))

    for epoch in range(start_epoch, args.epochs):

        print('Epoch {}/{}'.format(epoch, args.epochs - 1))
        print('-' * 10)

        train(train_data_loader, train_test_data_loader, model, optimizer,
              loss_fn, epoch, train_writer)
        acc = val(test_data_loader, trainloader_wo_shuf, model, epoch,
                  val_writer)

        is_best_acc = acc > best_acc
        best_acc = max(best_acc, acc)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                # 'arch': args.arch,
                'model_state_dict': model.state_dict(),
                'loss_state_dict': loss_fn.state_dict(),
                # 'lemniscate': lemniscate,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            },
            is_best_acc,
            sv_name)

        scheduler.step()
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    model = sim2sem.builder.Sim2Sem(ResConvEncoder,
                                    args.fea_dim,
                                    args.num_clusters,
                                    args.k,
                                    args.m,
                                    args.t,
                                    args.sr,
                                    freeze_encoder_s=True)
    print(model)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        # raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    # optimizer = torch.optim.SGD(model.parameters(), args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)
    optimizer_pre = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    args.resume = '{}/checkpoint_last.pth.tar'.format(args.save_folder)
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch_pre = checkpoint['epoch_pre']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer_pre.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch_pre']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    if args.aug_plus:
        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomApply(
                [
                    transforms.ColorJitter(0.4, 0.4, 0.4,
                                           0.1)  # not strengthened
                ],
                p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomApply([sim2sem.loader.GaussianBlur([.1, 2.])],
                                   p=0.5),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]
    else:
        # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ]

    train_sim = STL10(args.data_folder,
                      split="train+test+unlabeled",
                      transform=sim2sem.loader.TwoCropsTransform(
                          transforms.Compose(augmentation)))

    if args.distributed:
        train_sampler_sim = torch.utils.data.distributed.DistributedSampler(
            train_sim)
    else:
        train_sampler_sim = None

    train_loader_sim = torch.utils.data.DataLoader(
        train_sim,
        batch_size=args.prebatch_size,
        shuffle=(train_sampler_sim is None),
        num_workers=args.workers,
        pin_memory=True,
        sampler=train_sampler_sim,
        drop_last=True)

    # Pretraining
    for epoch_pre in range(args.start_epoch_pre, args.epochs_pre):
        if args.distributed:
            train_sampler_sim.set_epoch(epoch_pre)
        adjust_learning_rate_pre(optimizer_pre, epoch_pre, args)

        # train for one epoch
        train_pre(train_loader_sim, model, criterion, optimizer_pre, epoch_pre,
                  args)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0 and
            (epoch_pre + 1) % args.save_freq == 0):
            save_checkpoint(
                {
                    'epoch_pre': epoch_pre + 1,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer_pre.state_dict(),
                },
                is_best=False,
                filename='{}/checkpoint_pre_{:04d}.pth.tar'.format(
                    args.save_folder, epoch_pre))

            save_checkpoint(
                {
                    'epoch_pre': epoch_pre + 1,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer_pre.state_dict(),
                },
                is_best=False,
                filename='{}/checkpoint_last.pth.tar'.format(args.save_folder))

            if (epoch_pre + 1) == args.epochs_pre:
                save_checkpoint(
                    {
                        'epoch_pre': epoch_pre + 1,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer_pre.state_dict(),
                    },
                    is_best=False,
                    filename='{}/checkpoint_final.pth.tar'.format(
                        args.save_folder))
예제 #21
0
파일: main.py 프로젝트: Howardqlz/Meta-MTL
    parser.add_argument('--cifar100', action='store_true')
    parser.add_argument('--cifar10', action='store_true')
    parser.add_argument('--miniimagenet', action='store_true')
    parser.add_argument('--half', action='store_true')
    parser.add_argument('--fine_labels', action='store_true')  # only valid when using cifar100
    parser.add_argument('--random_kmeans', action='store_true')
    args = parser.parse_args()

    n_tasks = args.num_tasks

    # miniimagenet
    transform_mini_train = transforms.Compose(
        [
            transforms.Resize(64),
            transforms.RandomHorizontalFlip(),
            transforms.RandomGrayscale(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

    transform_mini_test = transforms.Compose(
        [
            transforms.Resize(64),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

    # cifar100
    transform_cifar100_train = transforms.Compose(
        [
            transforms.RandomHorizontalFlip(),
            transforms.RandomGrayscale(),
            transforms.ToTensor(),
예제 #22
0
def main():
    parser = argparse.ArgumentParser(
        description='Scene Classification Training')
    parser.add_argument('--device',
                        default='cuda:0',
                        type=str,
                        required=False,
                        help='GPU ids')
    parser.add_argument('--epoch',
                        default=350,
                        type=int,
                        required=True,
                        help='training epochs')
    parser.add_argument('--alldata',
                        dest='alldata',
                        action='store_true',
                        help='use alldata to train')
    parser.add_argument('--lr', default=0.01, type=float, help='learning rate')
    parser.add_argument('--weight_decay',
                        '-w',
                        default=5e-4,
                        type=float,
                        help='weight_decay')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='training batch size')
    parser.add_argument('--output_dir', default='./checkpoint', type=str)
    parser.add_argument('--warm_up_epochs', default=10, type=int)
    parser.add_argument('--log_file', type=str, default='./log/default.log')
    parser.add_argument('--params', type=str, default=None)
    args = parser.parse_args()

    log_file_name = args.log_file
    logger.add(log_file_name)
    logger.info('args:\n' + args.__repr__())

    batch_size = args.batch_size
    output_dir = args.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    device = args.device
    best_acc = 0  # best test accuracy

    # Data
    logger.info('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.5,
                               contrast=0.5,
                               saturation=0.5,
                               hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        transforms.ToTensor(),
    ])

    transform_test = transforms.Compose([
        transforms.ToPILImage(),  #适应自己编写的dataset
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor()
    ])
    train_dir = os.path.join(DATA_PATH, 'train/train')
    if args.alldata == False:
        whole_set = SceneDataset(
            annotations_csv='/home/linhw/myproject/data_scene/train_labels.csv',
            root_dir=train_dir,
            transform=transform_train)

        whole_set2 = SceneDataset(
            annotations_csv='/home/linhw/myproject/data_scene/train_labels.csv',
            root_dir=train_dir,
            transform=transform_test)
        whole_len = len(whole_set)
        train_len = int(whole_len * 0.9)  #划分train和val数据集
        val_len = whole_len - train_len
        indices = random.sample(range(0, whole_len), train_len)
        indices2 = []
        for x in range(0, whole_len):
            if x not in indices:
                indices2.append(x)

        trainset = torch.utils.data.Subset(whole_set, indices)
        testset = torch.utils.data.Subset(whole_set2, indices2)
    else:
        trainset = SceneDataset(
            annotations_csv='/home/linhw/myproject/data_scene/train_labels.csv',
            root_dir=train_dir,
            transform=transform_train)
        testset = torch.utils.data.Subset(
            SceneDataset(annotations_csv=
                         '/home/linhw/myproject/data_scene/train_labels.csv',
                         root_dir=train_dir,
                         transform=transform_test), range(8000)
        )  # when whole data is True, randomly choose 8000 training samples. Test acc here is training acc.

    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=8)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=8)

    # Model
    logger.info('==> Building model..')
    model = EfficientNet.from_name('efficientnet-b0', num_classes=100)
    model = model.to(device)
    if args.params is not None:
        model.load_state_dict(torch.load(args.params, map_location=device))

    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=args.weight_decay)
    #optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    warm_up_with_cosine_lr = lambda epoch: epoch / args.warm_up_epochs if epoch <= args.warm_up_epochs else 0.5 * (
        math.cos((epoch - args.warm_up_epochs) /
                 (args.epoch - args.warm_up_epochs) * math.pi) + 1)
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lr_lambda=warm_up_with_cosine_lr)

    best_acc = 0
    for epoch in range(args.epoch):
        logger.info("Epoch {} started".format(epoch))

        train_acc, training_loss = train(model, optimizer, trainloader, device)
        logger.info(
            "train acc = {:.4f}, training loss = {:.4f} lr = {:.4f}".format(
                train_acc, training_loss, warm_up_with_cosine_lr(epoch)))

        test_acc, test_loss = test(model, testloader, device)
        logger.info("test acc = {:.4f}, test loss = {:.4f}".format(
            test_acc, test_loss))

        if test_acc > best_acc:
            best_acc = test_acc
            logger.info("best acc improved to {:.4f}".format(best_acc))
            model_to_save = model.module if hasattr(model, 'module') else model
            torch.save(model_to_save.state_dict(),
                       '{}/bset_model.pt'.format(output_dir))
            logger.info("model saved to {}/bset_model.pt".format(output_dir))

        model_to_save = model.module if hasattr(model, 'module') else model
        torch.save(model_to_save.state_dict(),
                   '{}/last_model.pt'.format(output_dir))
        logger.info("model saved to {}/last_model.pt".format(output_dir))

        model_to_save = model.module if hasattr(model, 'module') else model
        torch.save(model_to_save.state_dict(),
                   '{}/model.pt'.format(output_dir))
        logger.info("model saved to {}/model.pt".format(output_dir))
        scheduler.step()

        logger.info("Epoch {} ended, best acc = {:.4f}".format(
            epoch, best_acc))

    logger.info("Training finished, best_acc = {:.4f}".format(best_acc))
예제 #23
0
def main(env, policy_net, target_net, action_space):

    Transition = namedtuple(
        "Transition", ('state', 'action', 'reward', 'next_state', "done"))
    rm = experience_replay()
    opt = torch.optim.RMSprop(policy_net.parameters(), lr=LR)

    preprocess = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomGrayscale(p=1),
        transforms.Resize((84, 84)),
        transforms.ToTensor()
    ])

    cnt = 0
    episodes_duration = []
    rewards_episode = []

    policy_net = policy_net.to(DEV)
    target_net = target_net.to(DEV)

    for episode in range(EPISODES):
        state = env.reset()  # image
        state = preprocess(state).unsqueeze(0).to(DEV)
        reward_episode = 0
        for i in count():
            probs = policy_net(state)
            if eps_decay(cnt) > random.random():
                action = torch.tensor([random.randrange(0, action_space)])
            else:
                action = torch.argmax(probs).view(1)

            next_state, reward, done, info = env.step(action.item())
            reward_episode += reward
            reward = torch.tensor([reward])
            next_state = preprocess(next_state).unsqueeze(0)
            cnt += 1
            rm.add_to_memory(
                (state.to("cpu"), action.to("cpu"), reward, next_state, done))

            batch = rm.get_batch_for_replay(BATCH_SIZE)
            if batch is None: continue
            batch = Transition(*zip(*batch))
            non_final_mask = torch.tensor(tuple(
                map(lambda s: s is not None, batch.next_state)),
                                          device=DEV)

            non_final_next_states = torch.cat(
                [s for s in batch.next_state if s is not None]).to(DEV)

            state_batch = torch.cat(batch.state).to(DEV)
            action_batch = torch.cat(batch.action).to(DEV)
            reward_batch = torch.cat(batch.reward).to(DEV)

            d = policy_net(state_batch)
            state_action_value = policy_net(state_batch).gather(
                1, action_batch.reshape(-1, 1))

            next_states_values = torch.zeros(BATCH_SIZE, device=DEV)
            next_states_values[non_final_mask] = target_net(
                non_final_next_states).max(1)[0].detach()

            y = (next_states_values * GAMMA) + reward_batch

            loss = F.smooth_l1_loss(state_action_value.squeeze(), y)

            opt.zero_grad()
            loss.backward()
            for param in policy_net.parameters():
                param.grad.data.clamp_(-1, 1)

            opt.step()
            if done:
                episodes_duration.append(i + 1)
                rewards_episode.append(reward_episode)
                print(
                    f"Episode: {episode} || step: {cnt} || reward episode: {reward_episode}"
                )
                #plot_durations(episodes_duration)
                break

        if episode % TARGET_UPDATE == 0:
            target_net.load_state_dict(policy_net.state_dict())
예제 #24
0
    def _get_data(self):

        data_dir = self.root
        to_pil = transforms.ToPILImage()
        to_tensor = transforms.ToTensor()

        # Choose subsets that should be included into the training
        training_list_img = []
        training_list_labels = []
        training_list_idx = []
        training_list_size = []
        training_out_classes = []

        if self.data_case == "train":
            to_tensor = transforms.Compose([
                transforms.Resize((self.args.img_w, self.args.img_h)),
                transforms.RandomResizedCrop(self.args.img_w,
                                             scale=(0.7, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ColorJitter(0.3, 0.3, 0.3, 0.3),
                transforms.RandomGrayscale(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225]),
            ])
        else:
            to_tensor = transforms.Compose([
                transforms.Resize((self.args.img_w, self.args.img_h)),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225]),
            ])

        ## import pdb; pdb.set_trace();
        for domain in self.list_train_domains:
            if self.data_case == "train":
                domain_data = h5py.File(data_dir + domain + "_" + "train.hdf5",
                                        "r")
            elif self.data_case == "val":
                domain_data = h5py.File(data_dir + domain + "_" + "val.hdf5",
                                        "r")
            elif self.data_case == "test":
                domain_data = h5py.File(data_dir + domain + "_" + "test.hdf5",
                                        "r")

            pacs_imgs = domain_data.get("images")
            # Convert labels in the range(1,7) into (0,6)
            pacs_labels = np.array(domain_data.get("labels")) - 1
            pacs_idx = []
            print(
                "Image: ",
                pacs_imgs.shape,
                " Labels: ",
                pacs_labels.shape,
                " Out Classes: ",
                len(np.unique(pacs_labels)),
            )

            pacs_img_trans = torch.zeros((pacs_imgs.shape[0], self.args.img_c,
                                          self.args.img_w, self.args.img_h))
            for i in range(len(pacs_imgs)):
                curr_img = Image.fromarray(
                    pacs_imgs[i, :, :, :].astype("uint8"), "RGB")

                pacs_img_trans[i, :, :, :] = to_tensor(curr_img)
                pacs_idx.append(i)

            print("Source Domain ", domain)
            training_list_img.append(pacs_img_trans)
            training_list_labels.append(torch.tensor(pacs_labels).long())
            training_list_idx.append(pacs_idx)
            training_list_size.append(len(pacs_imgs))
            training_out_classes.append(len(np.unique(pacs_labels)))

        if self.data_case == "train":
            num_classes = 7
            for y_c in range(num_classes):
                base_class_size = 0
                base_class_idx = -1
                for d_idx, domain in enumerate(self.list_train_domains):
                    class_idx = training_list_labels[d_idx] == y_c
                    curr_class_size = training_list_labels[d_idx][
                        class_idx].shape[0]
                    if base_class_size < curr_class_size:
                        base_class_size = curr_class_size
                        base_class_idx = d_idx
                self.base_domain_size += base_class_size
                print("Max Class Size: ", base_class_size, base_class_idx, y_c)

        # Stack
        train_imgs = torch.cat(training_list_img)
        train_labels = torch.cat(training_list_labels)
        train_indices = np.array(training_list_idx)
        train_indices = np.hstack(train_indices)
        self.training_list_size = training_list_size

        print(train_imgs.shape, train_labels.shape, train_indices.shape)
        print(self.training_list_size)

        # Create domain labels
        train_domains = torch.zeros(train_labels.size())
        domain_start = 0
        for idx in range(len(self.list_train_domains)):
            curr_domain_size = self.training_list_size[idx]
            train_domains[domain_start:domain_start + curr_domain_size] += idx
            domain_start += curr_domain_size

        # Shuffle everything one more time
        inds = np.arange(train_labels.size()[0])
        np.random.shuffle(inds)
        train_imgs = train_imgs[inds]
        train_labels = train_labels[inds]
        train_domains = train_domains[inds].long()
        train_indices = train_indices[inds]

        # Convert to onehot
        out_classes = training_out_classes[0]
        y = torch.eye(out_classes)
        train_labels = y[train_labels]

        # Convert to onehot
        d = torch.eye(len(self.list_train_domains))
        train_domains = d[train_domains]

        print(
            train_imgs.shape,
            train_labels.shape,
            train_domains.shape,
            train_indices.shape,
        )
        # If shape (B,H,W) change it to (B,C,H,W) with C=1
        if len(train_imgs.shape) == 3:
            train_imgs = train_imgs.unsqueeze(1)
        return train_imgs, train_labels, train_domains, train_indices
def main():
    # 随机种子
    np.random.seed(666)
    torch.manual_seed(666)
    torch.cuda.manual_seed_all(666)
    random.seed(666)

    # 获取当前文件名,用于创建模型及结果文件的目录
    file_name = os.path.basename(__file__).split('.')[0]
    # 创建保存模型和结果的文件夹
    if not os.path.exists('../data/model/%s' % file_name):
        os.makedirs('../data/model/%s' % file_name)
    if not os.path.exists('../data/result/%s' % file_name):
        os.makedirs('../data/result/%s' % file_name)
    # 创建日志文件
    if not os.path.exists('../data/result/%s.txt' % file_name):
        with open('../data/result/%s.txt' % file_name, 'w') as acc_file:
            pass
    with open('../data/result/%s.txt' % file_name, 'a') as acc_file:
        acc_file.write('\n%s %s\n' % (time.strftime(
            "%Y-%m-%d %H:%M:%S", time.localtime(time.time())), file_name))

    # 默认使用PIL读图
    def default_loader(path):
        # return Image.open(path)
        return Image.open(path).convert('RGB')

    # 训练集图片读取
    class TrainDataset(Dataset):
        def __init__(self,
                     label_list,
                     transform=None,
                     target_transform=None,
                     loader=default_loader):
            imgs = []
            for index, row in label_list.iterrows():
                imgs.append((row['img_path'], row['label']))
            self.imgs = imgs
            self.transform = transform
            self.target_transform = target_transform
            self.loader = loader

        def __getitem__(self, index):
            filename, label = self.imgs[index]
            img = self.loader(filename)
            if self.transform is not None:
                img = self.transform(img)
            return img, label

        def __len__(self):
            return len(self.imgs)

    # 验证集图片读取
    class ValDataset(Dataset):
        def __init__(self,
                     label_list,
                     transform=None,
                     target_transform=None,
                     loader=default_loader):
            imgs = []
            for index, row in label_list.iterrows():
                imgs.append((row['img_path'], row['label']))
            self.imgs = imgs
            self.transform = transform
            self.target_transform = target_transform
            self.loader = loader

        def __getitem__(self, index):
            filename, label = self.imgs[index]
            img = self.loader(filename)
            if self.transform is not None:
                img = self.transform(img)
            return img, label

        def __len__(self):
            return len(self.imgs)

    # 测试集图片读取
    class TestDataset(Dataset):
        def __init__(self,
                     label_list,
                     transform=None,
                     target_transform=None,
                     loader=default_loader):
            imgs = []
            for index, row in label_list.iterrows():
                imgs.append((row['img_path']))
            self.imgs = imgs
            self.transform = transform
            self.target_transform = target_transform
            self.loader = loader

        def __getitem__(self, index):
            filename = self.imgs[index]
            img = self.loader(filename)
            if self.transform is not None:
                img = self.transform(img)
            return img, filename

        def __len__(self):
            return len(self.imgs)

    # 数据增强:在给定角度中随机进行旋转
    class FixedRotation(object):
        def __init__(self, angles):
            self.angles = angles

        def __call__(self, img):
            return fixed_rotate(img, self.angles)

    def fixed_rotate(img, angles):
        angles = list(angles)
        angles_num = len(angles)
        index = random.randint(0, angles_num - 1)
        return img.rotate(angles[index])

    # 训练函数
    def train(train_loader, model, criterion, optimizer, epoch):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()

        # switch to train mode
        model.train()

        end = time.time()
        # 从训练集迭代器中获取训练数据
        for i, (images, target) in enumerate(train_loader):
            # 评估图片读取耗时
            data_time.update(time.time() - end)
            # 将图片和标签转化为tensor
            image_var = torch.tensor(images).cuda(async=True)
            label = torch.tensor(target).cuda(async=True)

            # 将图片输入网络,前传,生成预测值
            y_pred = model(image_var)
            # 计算loss
            loss = criterion(y_pred, label)
            losses.update(loss.item(), images.size(0))

            # 计算top1正确率
            prec, PRED_COUNT = accuracy(y_pred.data, target, topk=(1, 1))
            acc.update(prec, PRED_COUNT)

            # 对梯度进行反向传播,使用随机梯度下降更新网络权重
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 评估训练耗时
            batch_time.update(time.time() - end)
            end = time.time()

            # 打印耗时与结果
            if i % print_freq == 0:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Accuray {acc.val:.3f} ({acc.avg:.3f})'.format(
                          epoch,
                          i,
                          len(train_loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          acc=acc))

    # 验证函数
    def validate(val_loader, model, criterion):
        batch_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()

        # switch to evaluate mode
        model.eval()

        end = time.time()
        for i, (images, labels) in enumerate(val_loader):
            image_var = torch.tensor(images).cuda(async=True)
            target = torch.tensor(labels).cuda(async=True)

            # 图片前传。验证和测试时不需要更新网络权重,所以使用torch.no_grad(),表示不计算梯度
            with torch.no_grad():
                y_pred = model(image_var)
                loss = criterion(y_pred, target)

            # measure accuracy and record loss
            prec, PRED_COUNT = accuracy(y_pred.data, labels, topk=(1, 1))
            losses.update(loss.item(), images.size(0))
            acc.update(prec, PRED_COUNT)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('TrainVal: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Accuray {acc.val:.3f} ({acc.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          acc=acc))

        print(' * Accuray {acc.avg:.3f}'.format(acc=acc),
              '(Previous Best Acc: %.3f)' % best_precision,
              ' * Loss {loss.avg:.3f}'.format(loss=losses),
              'Previous Lowest Loss: %.3f)' % lowest_loss)
        return acc.avg, losses.avg

    # 测试函数
    def test(test_loader, model):
        csv_map = OrderedDict({'filename': [], 'probability': []})
        # switch to evaluate mode
        model.eval()
        for i, (images, filepath) in enumerate(tqdm(test_loader)):
            # bs, ncrops, c, h, w = images.size()
            filepath = [os.path.basename(i) for i in filepath]
            image_var = torch.tensor(images,
                                     requires_grad=False)  # for pytorch 0.4

            with torch.no_grad():
                y_pred = model(image_var)
                # 使用softmax函数将图片预测结果转换成类别概率
                smax = nn.Softmax(1)
                smax_out = smax(y_pred)

            # 保存图片名称与预测概率
            csv_map['filename'].extend(filepath)
            for output in smax_out:
                prob = ';'.join([str(i) for i in output.data.tolist()])
                csv_map['probability'].append(prob)

        result = pd.DataFrame(csv_map)
        result['probability'] = result['probability'].map(
            lambda x: [float(i) for i in x.split(';')])

        # 转换成提交样例中的格式
        sub_filename, sub_label = [], []
        for index, row in result.iterrows():
            sub_filename.append(row['filename'])
            pred_label = np.argmax(row['probability'])
            if pred_label == 0:
                sub_label.append('norm')
            else:
                if pred_label == 12:
                    pred_label = 2
                if pred_label == 13:
                    pred_label = 11
                if pred_label == 14:
                    pred_label = 8
                if pred_label == 15:
                    pred_label = 10
                if pred_label == 16:
                    pred_label = 2
                if pred_label == 17:
                    pred_label = 10
                sub_label.append('defect%d' % pred_label)

        # 生成结果文件,保存在result文件夹中,可用于直接提交
        submission = pd.DataFrame({
            'filename': sub_filename,
            'label': sub_label
        })
        submission.to_csv(
            ("../submit/submit_" +
             datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv"),
            header=None,
            index=False)
        return

    # 保存最新模型以及最优模型
    def save_checkpoint(state,
                        is_best,
                        is_lowest_loss,
                        filename='../data/model/%s/checkpoint.pth.tar' %
                        file_name):
        torch.save(state, filename)
        if is_best:
            shutil.copyfile(filename,
                            '../data/model/%s/model_best.pth.tar' % file_name)
        if is_lowest_loss:
            shutil.copyfile(filename,
                            '../data/model/%s/lowest_loss.pth.tar' % file_name)

    # 用于计算精度和时间的变化
    class AverageMeter(object):
        """Computes and stores the average and current value"""
        def __init__(self):
            self.reset()

        def reset(self):
            self.val = 0
            self.avg = 0
            self.sum = 0
            self.count = 0

        def update(self, val, n=1):
            self.val = val
            self.sum += val * n
            self.count += n
            self.avg = self.sum / self.count

    # 学习率衰减:lr = lr / lr_decay
    def adjust_learning_rate():
        nonlocal lr
        lr = lr / lr_decay
        return optim.Adam(model.parameters(),
                          lr,
                          weight_decay=weight_decay,
                          amsgrad=True)

    # 计算top K准确率
    def accuracy(y_pred, y_actual, topk=(1, )):
        """Computes the precision@k for the specified values of k"""
        final_acc = 0
        maxk = max(topk)
        # for prob_threshold in np.arange(0, 1, 0.01):
        PRED_COUNT = y_actual.size(0)
        PRED_CORRECT_COUNT = 0
        prob, pred = y_pred.topk(maxk, 1, True, True)
        # prob = np.where(prob > prob_threshold, prob, 0)
        for j in range(pred.size(0)):
            if int(y_actual[j]) == int(pred[j]):
                PRED_CORRECT_COUNT += 1
        if PRED_COUNT == 0:
            final_acc = 0
        else:
            final_acc = PRED_CORRECT_COUNT / PRED_COUNT
        return final_acc * 100, PRED_COUNT

    # 程序主体

    # 设定GPU ID
    # 小数据集上,batch size不易过大。如出现out of memory,应调小batch size
    batch_size = 24
    # 进程数量,最好不要超过电脑最大进程数,尽量能被batch size整除。windows下报错可以改为workers=0
    workers = 12

    # epoch数量,分stage进行,跑完一个stage后降低学习率进入下一个stage
    stage_epochs = [20, 10, 10]
    # 初始学习率
    lr = 5e-5
    # 学习率衰减系数 (new_lr = lr / lr_decay)
    lr_decay = 5
    # 正则化系数
    weight_decay = 1e-3

    # 参数初始化
    stage = 0
    start_epoch = 0
    total_epochs = sum(stage_epochs)
    best_precision = 0
    lowest_loss = 100

    # 设定打印频率,即多少step打印一次,用于观察loss和acc的实时变化
    # 打印结果中,括号前面为实时loss和acc,括号内部为epoch内平均loss和acc
    print_freq = 1
    # 验证集比例
    val_ratio = 0.12
    # 是否只验证,不训练
    evaluate = False
    # 是否从断点继续跑
    resume = False
    # 创建inception_v4模型
    model = pretrainedmodels.__dict__['nasnetalarge'](num_classes=1000,
                                                      pretrained='imagenet')
    model.last_linear = nn.Linear(4032, 18)
    model = torch.nn.DataParallel(model).cuda()

    # optionally resume from a checkpoint
    if resume:
        checkpoint_path = '../data/model/%s/checkpoint.pth.tar' % file_name
        if os.path.isfile(checkpoint_path):
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path)
            start_epoch = checkpoint['epoch'] + 1
            best_precision = checkpoint['best_precision']
            lowest_loss = checkpoint['lowest_loss']
            stage = checkpoint['stage']
            lr = checkpoint['lr']
            model.load_state_dict(checkpoint['state_dict'])
            # 如果中断点恰好为转换stage的点,需要特殊处理
            if start_epoch in np.cumsum(stage_epochs)[:-1]:
                stage += 1
                optimizer = adjust_learning_rate()
                model.load_state_dict(
                    torch.load('../data/model/%s/model_best.pth.tar' %
                               file_name)['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(resume))

    # 读取训练图片列表
    all_data = pd.read_csv('../data/label.csv')
    # 分离训练集和测试集,stratify参数用于分层抽样
    train_data_list, val_data_list = train_test_split(
        all_data,
        test_size=val_ratio,
        random_state=666,
        stratify=all_data['label'])
    # 读取测试图片列表
    test_data_list = pd.read_csv('../data/test.csv')

    # 图片归一化,由于采用ImageNet预训练网络,因此这里直接采用ImageNet网络的参数
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # 训练集图片变换,输入网络的尺寸为384*384
    train_data = TrainDataset(
        train_data_list,
        transform=transforms.Compose([
            transforms.Resize((350, 350)),
            transforms.ColorJitter(0.15, 0.15, 0.15, 0.075),
            transforms.RandomHorizontalFlip(),
            transforms.RandomGrayscale(),
            # transforms.RandomRotation(20),
            FixedRotation([0, 90, 180, 270]),
            transforms.RandomCrop((331, 331)),
            transforms.ToTensor(),
            normalize,
        ]))

    # 验证集图片变换
    val_data = ValDataset(val_data_list,
                          transform=transforms.Compose([
                              transforms.Resize((350, 350)),
                              transforms.CenterCrop((331, 331)),
                              transforms.ToTensor(),
                              normalize,
                          ]))

    # 测试集图片变换
    test_data = TestDataset(test_data_list,
                            transform=transforms.Compose([
                                transforms.Resize((350, 350)),
                                transforms.CenterCrop((331, 331)),
                                transforms.ToTensor(),
                                normalize,
                            ]))

    # 生成图片迭代器
    train_loader = DataLoader(train_data,
                              batch_size=batch_size,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=workers)
    val_loader = DataLoader(val_data,
                            batch_size=batch_size * 2,
                            shuffle=False,
                            pin_memory=False,
                            num_workers=workers)
    test_loader = DataLoader(test_data,
                             batch_size=batch_size * 2,
                             shuffle=False,
                             pin_memory=False,
                             num_workers=workers)

    # 使用交叉熵损失函数
    criterion = nn.CrossEntropyLoss().cuda()

    # 优化器,使用带amsgrad的Adam
    optimizer = optim.Adam(model.parameters(),
                           lr,
                           weight_decay=weight_decay,
                           amsgrad=True)

    if evaluate:
        validate(val_loader, model, criterion)
    else:
        # 开始训练
        for epoch in range(start_epoch, total_epochs):
            # train for one epoch
            train(train_loader, model, criterion, optimizer, epoch)
            # evaluate on validation set
            precision, avg_loss = validate(val_loader, model, criterion)

            # 在日志文件中记录每个epoch的精度和loss
            with open('../data/result/%s.txt' % file_name, 'a') as acc_file:
                acc_file.write('Epoch: %2d, Precision: %.8f, Loss: %.8f\n' %
                               (epoch, precision, avg_loss))

            # 记录最高精度与最低loss,保存最新模型与最佳模型
            is_best = precision > best_precision
            is_lowest_loss = avg_loss < lowest_loss
            best_precision = max(precision, best_precision)
            lowest_loss = min(avg_loss, lowest_loss)
            state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'best_precision': best_precision,
                'lowest_loss': lowest_loss,
                'stage': stage,
                'lr': lr,
            }
            save_checkpoint(state, is_best, is_lowest_loss)

            # 判断是否进行下一个stage
            if (epoch + 1) in np.cumsum(stage_epochs)[:-1]:
                stage += 1
                optimizer = adjust_learning_rate()
                model.load_state_dict(
                    torch.load('../data/model/%s/model_best.pth.tar' %
                               file_name)['state_dict'])
                print('Step into next stage')
                with open('../data/result/%s.txt' % file_name,
                          'a') as acc_file:
                    acc_file.write(
                        '---------------Step into next stage----------------\n'
                    )

    # 记录线下最佳分数
    with open('../data/result/%s.txt' % file_name, 'a') as acc_file:
        acc_file.write('* best acc: %.8f  %s\n' %
                       (best_precision, os.path.basename(__file__)))
    with open('../data/result/best_acc.txt', 'a') as acc_file:
        acc_file.write(
            '%s  * best acc: %.8f  %s\n' %
            (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(
                time.time())), best_precision, os.path.basename(__file__)))

    # 读取最佳模型,预测测试集,并生成可直接提交的结果文件
    best_model = torch.load('../data/model/%s/model_best.pth.tar' % file_name)
    model.load_state_dict(best_model['state_dict'])
    test(test_loader=test_loader, model=model)

    # 释放GPU缓存
    torch.cuda.empty_cache()
예제 #26
0
def get_train_transformations(p):
    if p['augmentation_strategy'] == 'standard':
        # Standard augmentation strategy
        return transforms.Compose([
            transforms.RandomResizedCrop(
                **p['augmentation_kwargs']['random_resized_crop']),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(**p['augmentation_kwargs']['normalize'])
        ])

    elif p['augmentation_strategy'] == 'simclr':
        # Augmentation strategy from the SimCLR paper

        if p['train_db_name'] == 'mnist':
            return transforms.Compose([
                transforms.Resize(48),
                transforms.RandomResizedCrop(
                    **p['augmentation_kwargs']['random_resized_crop']),
                transforms.RandomHorizontalFlip(),
                transforms.RandomApply([
                    transforms.ColorJitter(
                        **p['augmentation_kwargs']['color_jitter'])
                ],
                                       p=p['augmentation_kwargs']
                                       ['color_jitter_random_apply']['p']),
                transforms.RandomGrayscale(
                    **p['augmentation_kwargs']['random_grayscale']),
                transforms.ToTensor(),
                transforms.Normalize(**p['augmentation_kwargs']['normalize'])
            ])

        else:
            return transforms.Compose([
                transforms.RandomResizedCrop(
                    **p['augmentation_kwargs']['random_resized_crop']),
                transforms.RandomHorizontalFlip(),
                transforms.RandomApply([
                    transforms.ColorJitter(
                        **p['augmentation_kwargs']['color_jitter'])
                ],
                                       p=p['augmentation_kwargs']
                                       ['color_jitter_random_apply']['p']),
                transforms.RandomGrayscale(
                    **p['augmentation_kwargs']['random_grayscale']),
                transforms.ToTensor(),
                transforms.Normalize(**p['augmentation_kwargs']['normalize'])
            ])

    elif p['augmentation_strategy'] == 'ours':
        # Augmentation strategy from our paper
        return transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(p['augmentation_kwargs']['crop_size'],
                                  pad_if_needed=True),
            Augment(p['augmentation_kwargs']['num_strong_augs']),
            transforms.ToTensor(),
            transforms.Normalize(**p['augmentation_kwargs']['normalize']),
            Cutout(
                n_holes=p['augmentation_kwargs']['cutout_kwargs']['n_holes'],
                length=p['augmentation_kwargs']['cutout_kwargs']['length'],
                random=p['augmentation_kwargs']['cutout_kwargs']['random'])
        ])

    else:
        raise ValueError('Invalid augmentation strategy {}'.format(
            p['augmentation_strategy']))
예제 #27
0
    'test':
    transforms.Compose([
        transforms.ToPILImage(),
        transforms.CenterCrop(100),
        #transforms.Resize(args.resize_size),
        transforms.ToTensor(),
        transforms.Normalize(dataset_mean, dataset_std)
    ])
}
## transforms for handshape annotated data
handshape_transforms = {
    'train':
    transforms.Compose([
        transforms.RandomResizedCrop(args.random_crop_size),
        #transforms.Resize(args.resize_size),
        transforms.RandomGrayscale(0.2),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(dataset_mean, dataset_std)
    ]),
    'val':
    transforms.Compose([
        transforms.CenterCrop(args.random_crop_size),
        #transforms.Resize(args.resize_size),
        transforms.ToTensor(),
        transforms.Normalize(dataset_mean, dataset_std)
    ]),
    'test':
    transforms.Compose([
        transforms.CenterCrop(args.random_crop_size),
        #transforms.Resize(args.resize_size),
예제 #28
0
파일: config.py 프로젝트: zdaxie/SelfSup
 DATALOADER=dict(NUM_WORKERS=6, ),
 TRAINER=dict(
     FP16=dict(ENABLED=False),
     NAME="SWAVRunner",
 ),
 INPUT=dict(
     AUG=dict(
         TRAIN_PIPELINES=dict(
             contrastive=[
                 ("RepeatList", dict(transforms=[
                     ("Torch_Compose", transforms.Compose([
                         transforms.RandomResizedCrop(224, scale=(0.14, 1.)),
                         transforms.RandomHorizontalFlip(p=0.5),
                         transforms.RandomApply([
                             transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8),
                         transforms.RandomGrayscale(p=0.2),
                     ])),
                     ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=0.5, mode="PIL")),
                     ("Torch_Compose", transforms.Compose([
                         transforms.ToTensor(),
                         transforms.Normalize(
                             mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
                     ])),
                 ], repeat_times=2)),
             ],
             multiview=[
                 ("RepeatList", dict(transforms=[
                     ("Torch_Compose", transforms.Compose([
                         transforms.RandomResizedCrop(96, scale=(0.05, 0.14)),
                         transforms.RandomHorizontalFlip(p=0.5),
예제 #29
0
    def test_random_grayscale(self):
        """Unit tests for random grayscale transform"""

        # Test Set 1: RGB -> 3 channel grayscale
        random_state = random.getstate()
        random.seed(42)
        x_shape = [2, 2, 3]
        x_np = np.random.randint(0, 256, x_shape, np.uint8)
        x_pil = Image.fromarray(x_np, mode='RGB')
        x_pil_2 = x_pil.convert('L')
        gray_np = np.array(x_pil_2)

        num_samples = 250
        num_gray = 0
        for _ in range(num_samples):
            gray_pil_2 = transforms.RandomGrayscale(p=0.5)(x_pil)
            gray_np_2 = np.array(gray_pil_2)
            if np.array_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1]) and \
               np.array_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2]) and \
               np.array_equal(gray_np, gray_np_2[:, :, 0]):
                num_gray = num_gray + 1

        p_value = stats.binom_test(num_gray, num_samples, p=0.5)
        random.setstate(random_state)
        assert p_value > 0.0001

        # Test Set 2: grayscale -> 1 channel grayscale
        random_state = random.getstate()
        random.seed(42)
        x_shape = [2, 2, 3]
        x_np = np.random.randint(0, 256, x_shape, np.uint8)
        x_pil = Image.fromarray(x_np, mode='RGB')
        x_pil_2 = x_pil.convert('L')
        gray_np = np.array(x_pil_2)

        num_samples = 250
        num_gray = 0
        for _ in range(num_samples):
            gray_pil_3 = transforms.RandomGrayscale(p=0.5)(x_pil_2)
            gray_np_3 = np.array(gray_pil_3)
            if np.array_equal(gray_np, gray_np_3):
                num_gray = num_gray + 1

        p_value = stats.binom_test(
            num_gray, num_samples,
            p=1.0)  # Note: grayscale is always unchanged
        random.setstate(random_state)
        assert p_value > 0.0001

        # Test set 3: Explicit tests
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')
        x_pil_2 = x_pil.convert('L')
        gray_np = np.array(x_pil_2)

        # Case 3a: RGB -> 3 channel grayscale (grayscaled)
        trans2 = transforms.RandomGrayscale(p=1.0)
        gray_pil_2 = trans2(x_pil)
        gray_np_2 = np.array(gray_pil_2)
        assert gray_pil_2.mode == 'RGB', 'mode should be RGB'
        assert gray_np_2.shape == tuple(x_shape), 'should be 3 channel'
        np.testing.assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1])
        np.testing.assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2])
        np.testing.assert_equal(gray_np, gray_np_2[:, :, 0])

        # Case 3b: RGB -> 3 channel grayscale (unchanged)
        trans2 = transforms.RandomGrayscale(p=0.0)
        gray_pil_2 = trans2(x_pil)
        gray_np_2 = np.array(gray_pil_2)
        assert gray_pil_2.mode == 'RGB', 'mode should be RGB'
        assert gray_np_2.shape == tuple(x_shape), 'should be 3 channel'
        np.testing.assert_equal(x_np, gray_np_2)

        # Case 3c: 1 channel grayscale -> 1 channel grayscale (grayscaled)
        trans3 = transforms.RandomGrayscale(p=1.0)
        gray_pil_3 = trans3(x_pil_2)
        gray_np_3 = np.array(gray_pil_3)
        assert gray_pil_3.mode == 'L', 'mode should be L'
        assert gray_np_3.shape == tuple(x_shape[0:2]), 'should be 1 channel'
        np.testing.assert_equal(gray_np, gray_np_3)

        # Case 3d: 1 channel grayscale -> 1 channel grayscale (unchanged)
        trans3 = transforms.RandomGrayscale(p=0.0)
        gray_pil_3 = trans3(x_pil_2)
        gray_np_3 = np.array(gray_pil_3)
        assert gray_pil_3.mode == 'L', 'mode should be L'
        assert gray_np_3.shape == tuple(x_shape[0:2]), 'should be 1 channel'
        np.testing.assert_equal(gray_np, gray_np_3)

        # Checking if RandomGrayscale can be printed as string
        trans3.__repr__()
예제 #30
0
def main():
    global BEST_ACC, LR_STATE
    start_epoch = cfg.CLS.start_epoch  # start from epoch 0 or last checkpoint epoch

    # Create ckpt folder
    if not os.path.isdir(cfg.CLS.ckpt):
        mkdir_p(cfg.CLS.ckpt)
    if args.cfg_file is not None and not cfg.CLS.evaluate:
        shutil.copyfile(
            args.cfg_file,
            os.path.join(cfg.CLS.ckpt,
                         args.cfg_file.split('/')[-1]))

    # Dataset and Loader
    normalize = transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)
    train_aug = [
        transforms.RandomResizedCrop(cfg.CLS.crop_size),
        transforms.RandomHorizontalFlip()
    ]
    if len(cfg.CLS.rotation) > 0:
        train_aug.append(transforms.RandomRotation(cfg.CLS.rotation))
    if len(cfg.CLS.pixel_jitter) > 0:
        train_aug.append(RandomPixelJitter(cfg.CLS.pixel_jitter))
    if cfg.CLS.grayscale > 0:
        train_aug.append(transforms.RandomGrayscale(cfg.CLS.grayscale))
    train_aug.append(transforms.ToTensor())
    train_aug.append(normalize)

    traindir = os.path.join(cfg.CLS.data_root, cfg.CLS.train_folder)
    train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        traindir, transforms.Compose(train_aug)),
                                               batch_size=cfg.CLS.train_batch,
                                               shuffle=True,
                                               num_workers=cfg.workers,
                                               pin_memory=True)

    if cfg.CLS.validate or cfg.CLS.evaluate:
        valdir = os.path.join(cfg.CLS.data_root, cfg.CLS.val_folder)
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(cfg.CLS.base_size),
                transforms.CenterCrop(cfg.CLS.crop_size),
                transforms.ToTensor(),
                normalize,
            ])),
                                                 batch_size=cfg.CLS.test_batch,
                                                 shuffle=False,
                                                 num_workers=cfg.workers,
                                                 pin_memory=True)

    # Create model
    model = models.__dict__[cfg.CLS.arch]()
    print(model)
    # Calculate FLOPs & Param
    n_flops, n_convops, n_params = measure_model(model, cfg.CLS.crop_size,
                                                 cfg.CLS.crop_size)
    print('==> FLOPs: {:.4f}M, Conv_FLOPs: {:.4f}M, Params: {:.4f}M'.format(
        n_flops / 1e6, n_convops / 1e6, n_params / 1e6))
    del model
    model = models.__dict__[cfg.CLS.arch]()

    # Load pre-train model
    if cfg.CLS.pretrained:
        print("==> Using pre-trained model '{}'".format(cfg.CLS.pretrained))
        pretrained_dict = torch.load(cfg.CLS.pretrained)
        try:
            pretrained_dict = pretrained_dict['state_dict']
        except:
            pretrained_dict = pretrained_dict
        model_dict = model.state_dict()
        updated_dict, match_layers, mismatch_layers = weight_filler(
            pretrained_dict, model_dict)
        model_dict.update(updated_dict)
        model.load_state_dict(model_dict)
    else:
        print("==> Creating model '{}'".format(cfg.CLS.arch))

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    if cfg.CLS.pretrained:

        def param_filter(param):
            return param[1]

        new_params = map(
            param_filter,
            filter(lambda p: p[0] in mismatch_layers,
                   model.named_parameters()))
        base_params = map(
            param_filter,
            filter(lambda p: p[0] in match_layers, model.named_parameters()))
        model_params = [{
            'params': base_params
        }, {
            'params': new_params,
            'lr': cfg.CLS.base_lr * 10
        }]
    else:
        model_params = model.parameters()
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    optimizer = optim.SGD(model_params,
                          lr=cfg.CLS.base_lr,
                          momentum=cfg.CLS.momentum,
                          weight_decay=cfg.CLS.weight_decay)

    # Evaluate model
    if cfg.CLS.evaluate:
        print('\n==> Evaluation only')
        test_loss, test_top1, test_top5 = test(val_loader, model, criterion,
                                               start_epoch, USE_CUDA)
        print(
            '==> Test Loss: {:.8f} | Test_top1: {:.4f}% | Test_top5: {:.4f}%'.
            format(test_loss, test_top1, test_top5))
        return

    # Resume training
    title = 'Pytorch-CLS-' + cfg.CLS.arch
    if cfg.CLS.resume:
        # Load checkpoint.
        print("==> Resuming from checkpoint '{}'".format(cfg.CLS.resume))
        assert os.path.isfile(
            cfg.CLS.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(cfg.CLS.resume)
        BEST_ACC = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    # Train and val
    for epoch in range(start_epoch, cfg.CLS.epochs):
        print('\nEpoch: [{}/{}] | LR: {:.8f}'.format(epoch + 1, cfg.CLS.epochs,
                                                     LR_STATE))

        train_loss, train_acc = mixup_train(train_loader, model, criterion,
                                            optimizer, epoch, USE_CUDA)
        if cfg.CLS.validate:
            test_loss, test_top1, test_top5 = test(val_loader, model,
                                                   criterion, epoch, USE_CUDA)
        else:
            test_loss, test_top1, test_top5 = 0.0, 0.0, 0.0

        # Append logger file
        logger.append([LR_STATE, train_loss, test_loss, train_acc, test_top1])
        # Save model
        save_checkpoint(model, optimizer, test_top1, epoch)
        # Draw curve
        try:
            draw_curve(cfg.CLS.arch, cfg.CLS.ckpt)
            print('==> Success saving log curve...')
        except:
            print('==> Saving log curve error...')

    logger.close()
    try:
        savefig(os.path.join(cfg.CLS.ckpt, 'log.eps'))
        shutil.copyfile(
            os.path.join(cfg.CLS.ckpt, 'log.txt'),
            os.path.join(
                cfg.CLS.ckpt, 'log{}.txt'.format(
                    datetime.datetime.now().strftime('%Y%m%d%H%M%S'))))
    except:
        print('copy log error.')
    print('==> Training Done!')
    print('==> Best acc: {:.4f}%'.format(BEST_ACC))