def classify_single_image(image_path):
    # model loading
    model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3],
                          NUM_CLASSES)
    model.eval()
    if USE_GPU:
        cudnn.enabled = True
        softmax = nn.Softmax().cuda()
        model.cuda()
        saved_state_dict = torch.load(MODEL_PATH)
    else:
        softmax = nn.Softmax()
        saved_state_dict = torch.load(MODEL_PATH, map_location='cpu')
    load_filtered_state_dict(model,
                             saved_state_dict,
                             ignore_layer=[],
                             reverse=False,
                             gpu=cudnn.enabled)

    transformations = transforms.Compose(
        [transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
         transforms.ToTensor()])
    imgs = torch.FloatTensor(1, 3, IMAGE_SIZE, IMAGE_SIZE)
    if USE_GPU:
        imgs = imgs.cuda()

    #image loading
    imgs[0] = transformations(Image.open(image_path).convert("RGB"))

    pred = model(imgs)
    pred = softmax(pred)
    print(pred.cpu().detach().numpy())
    _, pred_1 = pred.topk(1, 1, True, True)
    c = default_class[pred_1.cpu().numpy()[0][0]]
    print("{} -- {}".format(image_path, c))
def main(args):

    model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3],
                          args.num_classes)
    saved_state_dict = torch.load(args.saved_model)

    transformations = transforms.Compose([
        transforms.Resize((args.image_size, args.image_size)),
        transforms.ToTensor()
    ])

    if args.gpu[0] >= 0:
        cudnn.enabled = True
        softmax = nn.Softmax().cuda()
        model.cuda()
    else:
        softmax = nn.Softmax()

    load_filtered_state_dict(model,
                             saved_state_dict,
                             ignore_layer=[],
                             reverse=True)

    test_x, test_y, classes_names = get_dataset(args.test_data_dir)
    test_dataset = DataWrapper(test_x,
                               test_y,
                               transformations,
                               augumentation=False)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=1)

    classes, indices = np.unique(test_y, return_index=True)

    #n = (test_dataset.__len__() + args.batch_size - 1) / args.batch_size * args.batch_size
    n = test_dataset.__len__()

    y_pred = np.zeros((n))
    y = np.zeros((n))
    count = 0
    for i, (images, labels, names) in enumerate(test_loader):
        images = Variable(images)
        labels = Variable(labels)
        if args.gpu[0] >= 0:
            images = images.cuda()
            labels = labels.cuda()

        label_pred = model(images)
        label_pred = softmax(label_pred)

        n = images.size()[0]
        _, label_pred = label_pred.topk(1, 1, True, True)
        y_pred[count:count + n] = label_pred.view(-1).cpu().numpy()
        y[count:count + n] = labels.data.cpu().numpy()
        count += n

    plot(y, y_pred, classes_names)
Exemple #3
0
 def __init__(self, cfg):
     super().__init__()
     body = resnet.ResNet(cfg)
     fpn = fpn_module.FPN(in_channels_list=[0, 512, 1024, 2048],
                          out_channels=256)
     self.backbone = nn.Sequential(
         OrderedDict([("body", body), ("fpn", fpn)]))
     self.head = PAAHead(cfg)
     self.paa_loss = PAALoss(cfg)
     self.anchor_generator = AnchorGenerator(cfg)
Exemple #4
0
def main(args):
    cudnn.enabled = True

    print('Loading data.')

    transformations = transforms.Compose([
        transforms.Resize(240),
        transforms.RandomCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    train_x, train_y, classes_names = get_dataset(args.trainning_data_dir)
    test_x, test_y, _ = get_dataset(args.validation_data_dir)
    num_classes = len(classes_names)

    trainning_dataset = DataWrapper(train_x, train_y, transformations)
    eval_dataset = DataWrapper(test_x, test_y, transformations)

    train_loader = torch.utils.data.DataLoader(dataset=trainning_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=16)

    eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=16)
    n = trainning_dataset.__len__()
    print(n)

    # ResNet50 structure
    model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3],
                          num_classes)
    if args.saved_model:
        print('Loading model.')
        saved_state_dict = torch.load(args.saved_model)

        # 'origin model from pytorch'
        if 'resnet' in args.saved_model:
            load_filtered_state_dict(model,
                                     saved_state_dict,
                                     ignore_layer=[],
                                     reverse=False)
        else:
            load_filtered_state_dict(model, saved_state_dict, ignore_layer=[])

    crossEntropyLoss = nn.CrossEntropyLoss().cuda()
    softmax = nn.Softmax().cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    # multi-gpu
    model = nn.DataParallel(model, device_ids=[0, 1])
    model.cuda()

    Save_model = SaveBestModel(save_dir=args.save_path)
    Writer = SummaryWriter()
    step = 0
    for epoch in range(args.num_epochs):
        evaluate(eval_loader, model, Writer, step, Save_model, epoch)
        step = train(train_loader, model, crossEntropyLoss, optimizer, Writer,
                     args.batch_size, epoch, step, n)
Exemple #5
0
def eval():
    with tf.variable_scope(FLAGS.resnet):
        images, labels, num_classes = dataset_reader.build_input(
            FLAGS.test_batch_size,
            'val',
            dataset='places365',
            color_switch=FLAGS.color_switch,
            blur=0,
            multicrops_for_eval=FLAGS.test_with_multicrops)
        model = resnet.ResNet(num_classes,
                              None,
                              None,
                              None,
                              resnet=FLAGS.resnet,
                              mode='test',
                              float_type=tf.float32)
        logits = model.inference(images)
        model.compute_loss(labels + FLAGS.labels_offset, logits)

    precisions = tf.nn.in_top_k(tf.cast(model.predictions, tf.float32),
                                labels + FLAGS.labels_offset, 1)
    precision_op = tf.reduce_mean(tf.cast(precisions, tf.float32))
    if FLAGS.test_with_multicrops == 1:
        precisions = tf.nn.in_top_k(
            [tf.reduce_mean(model.predictions, axis=[0])], [labels[0]], 1)
        precision_op = tf.cast(precisions, tf.float32)
    # ========================= end of building model ================================

    gpu_options = tf.GPUOptions(allow_growth=False)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    sess = tf.Session(config=config)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    if FLAGS.pre_trained_filename is not None and FLAGS.finetuned_filename is not None:
        last_layer_variables = []
        finetuned_variables = []
        for v in tf.global_variables():
            if 'Momentum' in v.name:
                continue
            if v.name.find('logits') > 0:
                last_layer_variables.append(v)
                print('last layer\'s variables: %s' % v.name)
                continue

            print('finetuned variables:', v.name)
            finetuned_variables.append(v)

        loader1 = tf.train.Saver(var_list=finetuned_variables)
        loader1.restore(sess, FLAGS.finetuned_filename)

        loader2 = tf.train.Saver(var_list=last_layer_variables)
        loader2.restore(sess, FLAGS.pre_trained_filename)

        print('Succesfully loaded model from %s and %s.' %
              (FLAGS.finetuned_filename, FLAGS.pre_trained_filename))
    elif FLAGS.pre_trained_filename is not None:
        loader = tf.train.Saver()
        loader.restore(sess, FLAGS.pre_trained_filename)

        print('Succesfully loaded model from %s.' % FLAGS.pre_trained_filename)
    else:
        print('No models loaded...')

    print(
        '======================= eval process begins ========================='
    )
    average_loss = 0.0
    average_precision = 0.0
    if FLAGS.test_max_iter is None:
        max_iter = dataset_reader.num_per_epoche(
            'eval', 'places365') // FLAGS.test_batch_size
    else:
        max_iter = FLAGS.test_max_iter

    if FLAGS.test_with_multicrops == 1:
        max_iter = dataset_reader.num_per_epoche('eval', 'places365')

    step = 0
    while step < max_iter:
        step += 1
        loss, precision = sess.run([model.loss, precision_op])

        average_loss += loss
        average_precision += precision
        if step % 100 == 0:
            print(step, '/', max_iter, ':', average_loss / step,
                  average_precision / step)
        elif step % 10 == 0:
            print(step, '/', max_iter, ':', average_loss / step,
                  average_precision / step)

        # 10 / 365 : 1.82831767797 0.541999986768
        # 20 / 365 : 1.83651441932 0.543499980867
        # 30 / 365 : 1.82160102526 0.53766664962
        # 40 / 365 : 1.82607512772 0.540499981493
        # 50 / 365 : 1.81884565115 0.537599982023
        # 60 / 365 : 1.8105583032 0.54083331277
        # 70 / 365 : 1.81942391736 0.537714266351
        # 80 / 365 : 1.82481645197 0.53937498033
        # 90 / 365 : 1.8170008858 0.539888870385
        # 100 / 365 : 1.82069332361 0.538299981654
        # 110 / 365 : 1.81809715033 0.538909073309
        # 120 / 365 : 1.81752947768 0.539916648219
        # 130 / 365 : 1.81486196976 0.541692289939
        # 140 / 365 : 1.81867902449 0.540785696464
        # 150 / 365 : 1.81397258838 0.542666648626
        # 160 / 365 : 1.81897332892 0.541937481798
        # 170 / 365 : 1.82705717648 0.540705863868
        # 180 / 365 : 1.83086211814 0.538833316167
        # 190 / 365 : 1.83226556652 0.537999982426
        # 200 / 365 : 1.83346488535 0.537949982285
        # 210 / 365 : 1.82859564338 0.538428553229
        # 220 / 365 : 1.8310897605 0.537454527617
        # 230 / 365 : 1.83356597475 0.536869547289
        # 240 / 365 : 1.8296460549 0.537999982511
        # 250 / 365 : 1.82880885744 0.538159982085
        # 260 / 365 : 1.8355158627 0.537038444097
        # 270 / 365 : 1.83302205448 0.537518500951
        # 280 / 365 : 1.83109659127 0.537464267867
        # 290 / 365 : 1.83024783546 0.537206878539
        # 300 / 365 : 1.82938882232 0.537733315627
        # 310 / 365 : 1.8317064889 0.537225788351
        # 320 / 365 : 1.83259406239 0.537249981891
        # 330 / 365 : 1.83219493519 0.53715149688
        # 340 / 365 : 1.83274422358 0.536823511211
        # 350 / 365 : 1.83405183656 0.536828553166
        # 360 / 365 : 1.83265232974 0.536805537095
        # 2018-04-05 17:05:19.543344 18642] Step 0 Test
        #      loss = 1.8364, precision = 0.5362
        # 2018-04-05 17:42:57.255770 26263] Step 0 Test (Multi-crops)
        #      loss = 1.8639, precision = 0.5472

    coord.request_stop()
    coord.join(threads)

    return average_loss / max_iter, average_precision / max_iter
Exemple #6
0
def main(args):

    model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3],
                          args.num_classes)
    #model = dpn92(num_classes=args.num_classes)

    transformations = transforms.Compose([
        transforms.Resize((args.image_size, args.image_size)),
        transforms.ToTensor()
    ])

    if args.gpu[0] >= 0:
        cudnn.enabled = True
        softmax = nn.Softmax().cuda()
        model.cuda()
        saved_state_dict = torch.load(args.saved_model)
    else:
        softmax = nn.Softmax()
        saved_state_dict = torch.load(args.saved_model, map_location='cpu')

    load_filtered_state_dict(model,
                             saved_state_dict,
                             ignore_layer=[],
                             reverse=True,
                             gpu=cudnn.enabled)

    imgs_path = glob.glob(os.path.join(args.test_data_dir, '*.jpg'))
    imgs_path += glob.glob(os.path.join(args.test_data_dir, '*.jpeg'))
    imgs_path += glob.glob(os.path.join(args.test_data_dir, '*.png'))

    for i in xrange((len(imgs_path) + args.batch_size - 1) / args.batch_size):
        if args.gpu[0] >= 0:
            imgs = torch.FloatTensor(args.batch_size, 3, args.image_size,
                                     args.image_size).cuda()
        else:
            imgs = torch.FloatTensor(args.batch_size, 3, args.image_size,
                                     args.image_size)
        for j in xrange(min(args.batch_size, len(imgs_path))):
            img = Image.open(imgs_path[i * args.batch_size + j])
            img = img.convert("RGB")
            imgs[j] = transformations(img)

        pred = model(imgs)
        pred = softmax(pred)
        _, pred_1 = pred.topk(1, 1, True, True)

        for j in xrange(min(args.batch_size, len(imgs_path))):
            c = default_class[pred_1.cpu().numpy()[0][0]]
            print("{} -- {} {}".format(imgs_path[i * args.batch_size + j],
                                       pred_1, c))

            if args.save_path:
                img_numpy = imgs[j].cpu().numpy()
                img_numpy = img_numpy * 255
                # change to channel last
                img_numpy = np.transpose(img_numpy, (1, 2, 0)).astype(np.uint8)
                # rgb to bgr
                img_numpy = img_numpy[..., ::-1].copy()

                cv2.putText(img_numpy, c, (20, 20), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (255, 255, 255), 1, cv2.LINE_AA)

                save_path = os.path.join(
                    args.save_path,
                    os.path.basename(imgs_path[i * args.batch_size + j]))
                cv2.imwrite(save_path, img_numpy)
Exemple #7
0
def main():
    word_index_dict = json.load(open(args.word_index_json))  # 加载label信息
    num_classes = len(word_index_dict)  # 分为多少类,划分为多分类的问题 num_class 1823
    image_label_dict = json.load(open(args.image_label_json))

    cudnn.benchmark = True
    if args.model == 'densenet':
        # 两千多种字符,multi-label分类
        model = DenseNet121(num_classes).cuda()
    elif args.model == 'resnet':
        # resnet主要用于文字区域的segmentation以及object detection操作
        model = resnet.ResNet(num_classes=num_classes, args=args).cuda()
    else:
        return
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)  # 学习率初始为0.001
    # model = torch.nn.DataParallel(model).cuda()
    loss = Loss().cuda()

    if args.resume:
        state_dict = torch.load(args.resume)
        model.load_state_dict(state_dict['state_dict'])
        best_f1score = state_dict['f1score']
        start_epoch = state_dict['epoch'] + 1
    else:
        best_f1score = 0
        if args.model == 'resnet':
            start_epoch = 100
        else:
            start_epoch = 1
    args.epoch = start_epoch
    print('best_f1score', best_f1score)

    # 划分数据集
    test_filelist = sorted(glob(os.path.join(args.data_dir, 'test',
                                             '*')))  # 5664+
    trainval_filelist = sorted(glob(os.path.join(args.data_dir, 'train',
                                                 '*')))  # 13318

    # 两种输入size训练
    # train_filelist1: 长宽比小于8:1的图片,经过padding后变成 64*512 的输入
    # train_filelist2: 长宽比大于8:1的图片,经过padding, crop后变成 64*1024的输入
    train_filelist1, train_filelist2 = [], []

    # 黑名单,这些图片的label是有问题的
    black_list = set(json.load(open(args.black_json))['black_list'])
    image_hw_ratio_dict = json.load(open(args.image_hw_ratio_json))
    for f in trainval_filelist:
        image = f.split('/')[-1]
        if image in black_list:
            continue
        r = image_hw_ratio_dict[image]
        if r == 0:
            train_filelist1.append(f)
        else:
            train_filelist2.append(f)
    train_val_filelist = train_filelist1 + train_filelist2
    val_filelist = train_filelist1[-2048:]
    train_filelist1 = train_filelist1[:-2048]

    train_filelist2 = train_filelist2
    image_size = [512, 64]

    if args.phase in ['test', 'val', 'train_val']:
        # 测试输出文字检测结果
        test_dataset = dataloader.DataSet(
            test_filelist,
            image_label_dict,
            num_classes,
            # transform=train_transform,
            args=args,
            image_size=image_size,
            phase='test')
        test_loader = DataLoader(dataset=test_dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=8,
                                 pin_memory=True)
        train_filelist = train_filelist1[-2048:]
        train_dataset = dataloader.DataSet(train_filelist,
                                           image_label_dict,
                                           num_classes,
                                           image_size=image_size,
                                           args=args,
                                           phase='test')
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=8,
                                  pin_memory=True)

        val_dataset = dataloader.DataSet(val_filelist,
                                         image_label_dict,
                                         num_classes,
                                         image_size=image_size,
                                         args=args,
                                         phase='test')
        val_loader = DataLoader(dataset=val_dataset,
                                batch_size=1,
                                shuffle=False,
                                num_workers=8,
                                pin_memory=True)

        train_val_dataset = dataloader.DataSet(train_val_filelist,
                                               image_label_dict,
                                               num_classes,
                                               image_size=image_size,
                                               args=args,
                                               phase='test')
        train_val_loader = DataLoader(dataset=train_val_dataset,
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=8,
                                      pin_memory=True)

        if args.phase == 'test':
            # test(start_epoch - 1, model, val_loader, 'val')
            test(start_epoch - 1, model, test_loader, 'test')
            # test(start_epoch - 1, model, train_val_loader, 'train_val')
        elif args.phase == 'val':
            test(start_epoch - 1, model, train_loader, 'train')
            test(start_epoch - 1, model, val_loader, 'val')
        elif args.phase == 'train_val':
            test(start_epoch - 1, model, train_val_loader, 'train_val')
        return

    elif args.phase == 'train':

        train_dataset1 = dataloader.DataSet(train_filelist1,
                                            image_label_dict,
                                            num_classes,
                                            image_size=image_size,
                                            args=args,
                                            phase='train')
        # 长宽比小于8:1的图片,经过padding后变成 64*512 的输入
        train_loader1 = DataLoader(dataset=train_dataset1,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=8,
                                   pin_memory=True)
        train_dataset2 = dataloader.DataSet(train_filelist2,
                                            image_label_dict,
                                            num_classes,
                                            image_size=(1024, 64),
                                            args=args,
                                            phase='train')
        train_loader2 = DataLoader(dataset=train_dataset2,
                                   batch_size=args.batch_size // 2,
                                   shuffle=True,
                                   num_workers=8,
                                   pin_memory=True)
        val_dataset = dataloader.DataSet(val_filelist,
                                         image_label_dict,
                                         num_classes,
                                         image_size=image_size,
                                         args=args,
                                         phase='val')
        val_loader = DataLoader(dataset=val_dataset,
                                batch_size=min(8, args.batch_size),
                                shuffle=False,
                                num_workers=8,
                                pin_memory=True)
        filelist = glob(os.path.join(args.bg_dir, '*'))
        pretrain_dataset1 = dataloader.DataSet(filelist,
                                               image_label_dict,
                                               num_classes,
                                               image_size=args.image_size,
                                               word_index_dict=word_index_dict,
                                               args=args,
                                               font_range=[8, 32],
                                               margin=10,
                                               rotate_range=[-10., 10.],
                                               phase='pretrain')
        pretrain_loader1 = DataLoader(dataset=pretrain_dataset1,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=8,
                                      pin_memory=True)
        pretrain_dataset2 = dataloader.DataSet(filelist,
                                               image_label_dict,
                                               num_classes,
                                               image_size=(256, 128),
                                               word_index_dict=word_index_dict,
                                               args=args,
                                               font_range=[24, 64],
                                               margin=20,
                                               rotate_range=[-20., 20.],
                                               phase='pretrain')
        pretrain_loader2 = DataLoader(dataset=pretrain_dataset2,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=8,
                                      pin_memory=True)

        best_f1score = 0
        # eval_mode = 'pretrain-2'
        eval_mode = 'eval'
        for epoch in range(start_epoch, args.epochs):

            args.epoch = epoch

            if eval_mode == 'eval':
                if best_f1score > 0.9:
                    args.lr = 0.0001
                if best_f1score > 0.9:
                    args.hard_mining = 1

            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

            train_eval(epoch, model, train_loader1, loss, optimizer, 2.,
                       'train-1')
            if best_f1score > 0.9:
                train_eval(epoch, model, train_loader2, loss, optimizer, 2.,
                           'train-2')
            best_f1score = train_eval(
                epoch, model, val_loader, loss, optimizer, best_f1score,
                'eval-{:d}-{:d}'.format(args.batch_size, args.hard_mining))
            continue
            '''
Exemple #8
0
def eval():
    with tf.variable_scope(FLAGS.resnet):
        images, labels, _ = dataset_reader.build_input(
            FLAGS.test_batch_size,
            'val',
            dataset='imagenet',
            blur=0,
            resize_image=FLAGS.resize_image,
            color_switch=FLAGS.color_switch)
        model = resnet.ResNet(FLAGS.num_classes,
                              None,
                              None,
                              None,
                              resnet=FLAGS.resnet,
                              mode=FLAGS.mode,
                              float_type=tf.float32)
        logits = model.inference(images)
        model.compute_loss(labels + FLAGS.labels_offset, logits)
        precisions = tf.nn.in_top_k(tf.cast(model.predictions, tf.float32),
                                    labels + FLAGS.labels_offset, FLAGS.top_k)

    precision_op = tf.reduce_mean(tf.cast(precisions, tf.float32))
    # ========================= end of building model ================================

    gpu_options = tf.GPUOptions(allow_growth=False)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    sess = tf.Session(config=config)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    if FLAGS.pre_trained_filename is not None and FLAGS.finetuned_filename is not None:
        last_layer_variables = []
        finetuned_variables = []
        for v in tf.global_variables():
            if 'Momentum' in v.name:
                continue
            if v.name.find('logits') > 0:
                last_layer_variables.append(v)
                print('last layer\'s variables: %s' % v.name)
                continue

            print('finetuned variables:', v.name)
            finetuned_variables.append(v)

        loader1 = tf.train.Saver(var_list=finetuned_variables)
        loader1.restore(sess, FLAGS.finetuned_filename)

        loader2 = tf.train.Saver(var_list=last_layer_variables)
        loader2.restore(sess, FLAGS.pre_trained_filename)

        print('Succesfully loaded model from %s and %s.' %
              (FLAGS.finetuned_filename, FLAGS.pre_trained_filename))
    elif FLAGS.pre_trained_filename is not None:
        loader = tf.train.Saver()
        loader.restore(sess, FLAGS.pre_trained_filename)

        print('Succesfully loaded model from %s.' % FLAGS.pre_trained_filename)
    else:
        print('No models loaded...')

    print(
        '======================= eval process begins ========================='
    )
    average_loss = 0.0
    average_precision = 0.0
    if FLAGS.test_max_iter is None:
        max_iter = dataset_reader.num_per_epoche(
            'eval', 'imagenet') // FLAGS.test_batch_size
    else:
        max_iter = FLAGS.test_max_iter

    step = 0
    while step < max_iter:
        step += 1
        loss, precision = sess.run([model.loss, precision_op])

        average_loss += loss
        average_precision += precision
        if step % 100 == 0:
            print(step, '/', max_iter, ':', average_loss / step,
                  average_precision / step)
        elif step % 10 == 0:
            print(step, '/', max_iter, ':', average_loss / step,
                  average_precision / step)

        # batch size = 100, resnet_v1_101:
        # 10 / 500 : 1.05231621861 0.766999977827
        # 20 / 500 : 0.976500582695 0.773999977112
        # 30 / 500 : 0.969429596265 0.775666640202
        # 40 / 500 : 0.970155435801 0.773249974847
        # 50 / 500 : 0.980628492832 0.772999976873
        # 60 / 500 : 0.974383835991 0.771833313505
        # 70 / 500 : 0.967128694909 0.772428552594
        # 80 / 500 : 0.971453700215 0.768749982864
        # 90 / 500 : 0.977773231268 0.765444427066
        # 100 / 500 : 0.970571737289 0.76619998157
        # 460 / 500 : 0.990876104456 0.76347824374
        # 470 / 500 : 0.989712166723 0.763914876669
        # 480 / 500 : 0.989510885812 0.763645816346
        # 490 / 500 : 0.989354180499 0.764061207552
        # 500 / 500 : 0.992528787792 0.763559982777

        # batch size = 10, resnet_v1_101:
        # 10 / 5000 : 1.04412123561 0.769999998808
        # 20 / 5000 : 1.01021358222 0.784999996424
        # 30 / 5000 : 1.00341213942 0.779999997218
        # 40 / 5000 : 0.953749916703 0.77499999851
        # 50 / 5000 : 0.942187260389 0.775999999046
        # 100 / 5000 : 0.987873907052 0.764999999404
        # 5000 / 5000 : 0.992528784394 0.763559984207

        # batch size = 100, resnet_v1_152:
        # 10 / 500 : 0.894700920582 0.776999986172
        # 500 / 500 : 0.974335199773 0.7680999825

        # batch size = 100, resnet_v1_50:
        # 10 / 500 : 0.89778097868 0.774999976158
        # 500 / 500 : 1.04086481136 0.752079983711

        # https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models

    coord.request_stop()
    coord.join(threads)

    return average_loss / max_iter, average_precision / max_iter
Exemple #9
0
import glob
import os
import cv2
import numpy as np

# hyperparameters
default_class = ['drawings', 'hentai', 'neutral', 'p**n', 'sexy']

USE_GPU = True
NUM_CLASSES = len(default_class)
IMAGE_SIZE = 299
MODEL_PATH = "./models/resnet50-19c8e357.pth"

# model loading
print("model loading start")
model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3],
                      NUM_CLASSES)
model.eval()
if USE_GPU:
    cudnn.enabled = True
    softmax = nn.Softmax().cuda()
    model.cuda()
    saved_state_dict = torch.load(MODEL_PATH)
else:
    softmax = nn.Softmax()
    saved_state_dict = torch.load(MODEL_PATH, map_location='cpu')
load_filtered_state_dict(model,
                         saved_state_dict,
                         ignore_layer=[],
                         reverse=False,
                         gpu=cudnn.enabled)
Exemple #10
0
def eval(i_ckpt):
    tf.reset_default_graph()

    print('================', end='')
    if FLAGS.data_type == 16:
        print('using tf.float16 =====================')
        data_type = tf.float16
    else:
        print('using tf.float32 =====================')
        data_type = tf.float32

    with tf.variable_scope(FLAGS.resnet):
        images, labels, num_classes = dataset_reader.build_input(
            FLAGS.test_batch_size,
            'val',
            crop_size=FLAGS.test_crop_size,
            dataset=FLAGS.database,
            color_switch=FLAGS.color_switch,
            blur=0,
            resize_image=FLAGS.resize_image,
            multicrops_for_eval=FLAGS.test_with_multicrops)
        model = resnet.ResNet(num_classes,
                              None,
                              None,
                              None,
                              mode='eval',
                              bn_epsilon=FLAGS.epsilon,
                              norm_only=FLAGS.norm_only,
                              resnet=FLAGS.resnet,
                              float_type=data_type)
        logits = model.inference(images)
        model.compute_loss(labels, logits)

    precisions = tf.nn.in_top_k(tf.cast(model.predictions, tf.float32),
                                model.labels, 1)
    precision_op = tf.reduce_mean(tf.cast(precisions, tf.float32))

    if FLAGS.test_with_multicrops == 1:
        precisions = tf.nn.in_top_k(
            [tf.reduce_mean(model.predictions, axis=[0])], [labels[0]], 1)
        precision_op = tf.cast(precisions, tf.float32)

    # ========================= end of building model ================================

    gpu_options = tf.GPUOptions(allow_growth=False)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    sess = tf.Session(config=config)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    if i_ckpt is not None:
        loader = tf.train.Saver(max_to_keep=0)
        loader.restore(sess, i_ckpt)
        eval_step = i_ckpt.split('-')[-1]
        print('Succesfully loaded model from %s at step=%s.' %
              (i_ckpt, eval_step))

    print(
        '======================= eval process begins ========================='
    )
    average_loss = 0.0
    average_precision = 0.0
    if FLAGS.test_max_iter is None:
        max_iter = dataset_reader.num_per_epoche(
            'eval', FLAGS.database) // FLAGS.test_batch_size
    else:
        max_iter = FLAGS.test_max_iter

    if FLAGS.test_with_multicrops == 1:
        max_iter = dataset_reader.num_per_epoche('eval', FLAGS.database)

    step = 0
    while step < max_iter:
        step += 1
        loss, precision = sess.run([model.loss, precision_op])

        average_loss += loss
        average_precision += precision
        if step % 10 == 0:
            print(step, '/', max_iter, ':', average_loss / step,
                  average_precision / step)

    coord.request_stop()
    coord.join(threads)

    return average_loss / max_iter, average_precision / max_iter
Exemple #11
0
def train(resume_step=None):
    global_step = tf.get_variable('global_step', [],
                                  dtype=tf.int64,
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    print('================', end='')
    if FLAGS.data_type == 16:
        print('using tf.float16 =====================')
        data_type = tf.float16
    else:
        print('using tf.float32 =====================')
        data_type = tf.float32

    wd_rate_ph = tf.placeholder(data_type, shape=())
    wd_rate2_ph = tf.placeholder(data_type, shape=())
    lrn_rate_ph = tf.placeholder(data_type, shape=())

    with tf.variable_scope(FLAGS.resnet):
        images, labels, num_classes = dataset_reader.build_input(
            FLAGS.batch_size,
            'train',
            examples_per_class=FLAGS.examples_per_class,
            dataset=FLAGS.database,
            resize_image=FLAGS.resize_image,
            color_switch=FLAGS.color_switch,
            blur=FLAGS.blur)
        model = resnet.ResNet(num_classes,
                              lrn_rate_ph,
                              wd_rate_ph,
                              wd_rate2_ph,
                              optimizer=FLAGS.optimizer,
                              mode='train',
                              bn_epsilon=FLAGS.epsilon,
                              resnet=FLAGS.resnet,
                              norm_only=FLAGS.norm_only,
                              initializer=FLAGS.initializer,
                              fix_blocks=FLAGS.fix_blocks,
                              fine_tune_filename=FLAGS.fine_tune_filename,
                              bn_ema=FLAGS.ema_decay,
                              wd_mode=FLAGS.weight_decay_mode,
                              fisher_filename=FLAGS.fisher_filename,
                              gpu_num=FLAGS.gpu_num,
                              fisher_epsilon=FLAGS.fisher_epsilon,
                              float_type=data_type,
                              separate_regularization=FLAGS.separate_reg)
        model.inference(images)
        model.build_train_op(labels)

    names = []
    num_params = 0
    for v in tf.trainable_variables():
        # print v.name
        names.append(v.name)
        num = 1
        for i in v.get_shape().as_list():
            num *= i
        num_params += num
    print("Trainable parameters' num: %d" % num_params)

    precisions = tf.nn.in_top_k(tf.cast(model.predictions, tf.float32),
                                model.labels, 1)
    precision_op = tf.reduce_mean(tf.cast(precisions, tf.float32))
    # ========================= end of building model ================================

    step = 0
    saver = tf.train.Saver(max_to_keep=0)
    logdir = LogDir(FLAGS.database, FLAGS.log_dir, FLAGS.weight_decay_mode)
    logdir.print_all_info()
    if not os.path.exists(logdir.log_dir):
        print('creating ', logdir.log_dir, '...')
        os.mkdir(logdir.log_dir)
    if not os.path.exists(logdir.database_dir):
        print('creating ', logdir.database_dir, '...')
        os.mkdir(logdir.database_dir)
    if not os.path.exists(logdir.exp_dir):
        print('creating ', logdir.exp_dir, '...')
        os.mkdir(logdir.exp_dir)
    if not os.path.exists(logdir.snapshot_dir):
        print('creating ', logdir.snapshot_dir, '...')
        os.mkdir(logdir.snapshot_dir)

    init = [
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    ]

    gpu_options = tf.GPUOptions(allow_growth=False)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    sess = tf.Session(config=config)
    sess.run(init)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    import_variables = tf.trainable_variables()
    if FLAGS.fix_blocks > 0:
        import_variables = tf.global_variables()

    if FLAGS.fine_tune_filename is not None and resume_step is None:
        fine_tune_variables = []
        new_layers_names = model.new_layers_names
        new_layers_names.append('Momentum')
        for v in import_variables:
            if any(elem in v.name for elem in new_layers_names):
                print('not loading %s' % v.name)
                continue
            fine_tune_variables.append(v)

        loader = tf.train.Saver(var_list=fine_tune_variables)
        loader.restore(sess, FLAGS.fine_tune_filename)
        print('Succesfully loaded fine-tune model from %s.' %
              FLAGS.fine_tune_filename)
    elif resume_step is not None:
        # ./snapshot/model.ckpt-3000
        i_ckpt = logdir.snapshot_dir + '/model.ckpt-%d' % resume_step
        saver.restore(sess, i_ckpt)

        step = resume_step
        print('Succesfully loaded model from %s at step=%s.' %
              (i_ckpt, resume_step))
    else:
        print('Not import any model.')

    print(
        '=========================== training process begins ================================='
    )
    f_log = open(logdir.exp_dir + '/' + str(datetime.datetime.now()) + '.txt',
                 'w')
    f_log.write('step,loss,precision,wd\n')
    f_log.write(sorted_str_dict(FLAGS.__dict__) + '\n')

    average_loss = 0.0
    average_precision = 0.0
    show_period = 20
    snapshot = FLAGS.snapshot
    max_iter = FLAGS.train_max_iter
    lrn_rate = FLAGS.lrn_rate

    lr_step = []
    if FLAGS.lr_step is not None:
        temps = FLAGS.lr_step.split(',')
        for t in temps:
            lr_step.append(int(t))

    t0 = None
    wd_rate = FLAGS.weight_decay_rate
    wd_rate2 = FLAGS.weight_decay_rate2
    while step < max_iter + 1:
        step += 1

        if FLAGS.lr_policy == 'step':
            if len(lr_step) > 0 and step == lr_step[0]:
                lrn_rate *= FLAGS.step_size
                lr_step.remove(step)
        elif FLAGS.lr_policy == 'poly':
            lrn_rate = ((1 - 1.0 *
                         (step - 1) / max_iter)**0.9) * FLAGS.lrn_rate
        elif FLAGS.lr_policy == 'linear':
            lrn_rate = FLAGS.lrn_rate / step
        else:
            lrn_rate = FLAGS.lrn_rate

        _, loss, wd, precision = sess.run(
            [model.train_op, model.loss, model.wd, precision_op],
            feed_dict={
                lrn_rate_ph: lrn_rate,
                wd_rate_ph: wd_rate,
                wd_rate2_ph: wd_rate2
            })

        average_loss += loss
        average_precision += precision

        if FLAGS.save_first_iteration == 1 or step % snapshot == 0:
            saver.save(sess,
                       logdir.snapshot_dir + '/model.ckpt',
                       global_step=step)

        if step % show_period == 0:
            left_hours = 0

            if t0 is not None:
                delta_t = (datetime.datetime.now() - t0).seconds
                left_time = (max_iter - step) / show_period * delta_t
                left_hours = left_time / 3600.0

            t0 = datetime.datetime.now()

            average_loss /= show_period
            average_precision /= show_period

            if step == 0:
                average_loss *= show_period
                average_precision *= show_period

            f_log.write('%d,%f,%f,%f\n' %
                        (step, average_loss, average_precision, wd))
            f_log.flush()

            print('%s %s] Step %s, lr = %f, wd_rate = %f, wd_rate_2 = %f ' \
                  % (str(datetime.datetime.now()), str(os.getpid()), step, lrn_rate, wd_rate, wd_rate2))
            print('\t loss = %.4f, precision = %.4f, wd = %.4f' %
                  (average_loss, average_precision, wd))
            print('\t estimated time left: %.1f hours. %d/%d' %
                  (left_hours, step, max_iter))

            average_loss = 0.0
            average_precision = 0.0

    coord.request_stop()
    coord.join(threads)

    return f_log, logdir  # f_log returned for eval.
import torch as t
import torchvision as tv
from data import get_train_dataset, get_validation_dataset
from stopping import EarlyStoppingCallback
from trainer import Trainer
from matplotlib import pyplot as plt
import numpy as np
from model import resnet

# set up data loading for the training and validation set using t.utils.data.DataLoader and the methods implemented in data.py
train_dl = t.utils.data.DataLoader(get_train_dataset(), batch_size=50)
test_dl = t.utils.data.DataLoader(get_validation_dataset(), batch_size=20)
# set up your model
model = resnet.ResNet()
# set up loss (you can find preimplemented loss functions in t.nn) use the pos_weight parameter to ease convergence
loss = t.nn.MultiLabelSoftMarginLoss()
# set up optimizer (see t.optim);
optimizer = t.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# initialize the early stopping callback implemented in stopping.py and create a object of type Trainer
early_stopping = EarlyStoppingCallback(5)
trainer = Trainer(model,
                  loss,
                  optim=optimizer,
                  train_dl=train_dl,
                  val_test_dl=test_dl,
                  cuda=True,
                  early_stopping_cb=early_stopping)

# go, go, go... call fit on trainer
res = trainer.fit()