예제 #1
0
파일: train.py 프로젝트: muvanpersie/r_yolo
def main():
    # parser = argparse.ArgumentParser()
    # parser.add_argument("--epochs", type=int, default=20, help="number of epochs")
    # parser.add_argument("--batch_size", type=int, default=8, help="size of each image batch")
    # parser.add_argument("--pretrained_weights", type=str, help="if specified starts from checkpoint model")
    # parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
    # parser.add_argument("--img_width", type=int, default=512, help="width of each image")
    # parser.add_argument("--img_height", type=int, default=512, help="height of each image")
    # parser.add_argument("--checkpoint_interval", type=int, default=1, help="interval between saving model weights")
    # parser.add_argument("--evaluation_interval", type=int, default=1, help="interval evaluations on validation set")
    # parser.add_argument("--multiscale_training", default=True, help="allow for multi-scale training")
    # parser.add_argument("--anchors", type=str, default="./anchors.txt", help="file define anchors")
    # args = parser.parse_args()

    num_class = 7
    anchors = []
    with open("anchors.txt", "r") as f:
        for line in f:
            ss = line.split()
            anchors.append([float(x) for x in ss])
    f.close()

    root_path = "/home/lance/data/DataSets/detection/JPEGImages"
    dataset = VideoDataset(root_path, num_class = num_class, anchors = anchors,
                                 input_width = 416, input_height = 416, scales = [8, 16, 32])
    
    dataloader = DataLoader(dataset, num_workers=0, collate_fn=make_batch,
                             pin_memory=True, batch_size=4, shuffle=True)

    net = RYOLONet(num_class = num_class)
    net = torch.nn.DataParallel(net).cuda()

    optimizer = optim.SGD(net.parameters(), lr=0.006, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)


    train(dataloader, net, optimizer, lr_scheduler)
예제 #2
0
    logger = Logger(config)
    search_name = config["model"]["architecture"]
    model = getattr(importlib.import_module(search_name), 'model')(config)
    model.cuda()

    datasets = {}

    dataset_splits = DatasetSplitter.generate_splits(config)
    transformations = TransformsGenerator.get_final_transforms(config)

    for key in dataset_splits:
        path, batching_config, split = dataset_splits[key]
        transform = transformations[key]

        datasets[key] = VideoDataset(path, batching_config, transform, split)

    trainer = Trainer(config, model, datasets["train"], logger)
    evaluator = Evaluator(config,
                          datasets["validation"],
                          logger,
                          action_sampler=None,
                          logger_prefix="validation")

    # Resume training
    try:
        trainer.load_checkpoint(model)
    except Exception as e:
        logger.print(e)
        logger.print("Cannot play without loading checkpoint")
        exit(1)
예제 #3
0
def main(args):
    global best_acc

    if not isdir(args.model):
        mkdir_p(args.model)

    print("==> create model ConvNet")

    model = ConvNet()

    criterion = nn.NLLLoss().cuda()
    # criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    title = 'Video-Capture'

    if args.resume:
        if isfile(args.resume):
            print("==> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("==> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            logger = Logger(join(args.model, 'log.txt'),
                            title=title,
                            resume=True)
        else:
            print("==> checkpoint not found at '{}'".format(args.resume))
    else:
        logger = Logger(join(args.model, 'log.txt'), title=title)
        logger.set_names(
            ['Epoch', 'LR', 'Train Loss', 'Val Loss', 'Train Acc', 'Val Acc'])

    cudnn.benchmark = True
    print('  Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    trainPath = args.training_dataset

    train_loader = DataLoader(VideoDataset(trainPath, 'anno.csv', train=True),
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.workers,
                              pin_memory=True)

    valid_loader = DataLoader(VideoDataset(trainPath, 'anno.csv', train=False),
                              batch_size=args.test_batch_size,
                              shuffle=True,
                              num_workers=args.workers,
                              pin_memory=True)

    log_title = 'Frame Info'
    logger_frame = Logger(join(args.model, 'frame_info.txt'), title=log_title)
    logger_frame.set_names(['Epoch', 'Video Id', 'Frame Id', 'Correct Blocks'])

    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learing_rate(optimizer, epoch, lr, args.step_epoch)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        train_loss, train_acc = train(model, train_loader, optimizer,
                                      criterion, args.video_threshold,
                                      args.frame_threshold)
        valid_loss, valid_acc, frame_dict = validate(valid_loader, model,
                                                     criterion,
                                                     args.video_threshold,
                                                     args.frame_threshold)

        logger.append(
            [epoch + 1, lr, train_loss, valid_loss, train_acc, valid_acc])

        is_best = valid_acc > best_acc
        best_acc = max(valid_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict()
            },
            is_best,
            checkpoint=args.model)

        for key, value in frame_dict.items():
            v_id, f_id = key.split('_')
            logger_frame.append([epoch + 1, int(v_id), int(f_id), value])

    logger.close()
    logger_frame.close()
예제 #4
0
def main():
    parser = argparse.ArgumentParser()
    log = logging.getLogger("my-logger")
    log.info("Hello, world")
    log.info("Started Script")

    parser.add_argument("--local_rank", default=0, type=int)
    parser.add_argument("--trainingtype", default="regression")
    parser.add_argument("--epochs", default=20, type=int)
    parser.add_argument("--batchsize", default=128, type=int)
    parser.add_argument("--eval_savepath",
                        default="/network-ceph/pgrundmann/video_evaluations/")
    parser.add_argument("--experiment_name", default="lstm")
    parser.add_argument("--testvideo_filename",
                        default="/network-ceph/pgrundmann/sony.mp4")
    parser.add_argument("--steps_per_epoch", default=-1, type=int)
    parser.add_argument("--no_lstm", default=False, action='store_true')
    parser.add_argument("--stateful", default=False, action='store_true')
    parser.add_argument("--imagenet", default=False, action='store_true')
    args = parser.parse_args()
    log.info(str(args))

    if bool(args.no_lstm):
        print("No LSTM")
    else:
        print("Use LSTM")
    args.use_lstm = not args.no_lstm

    torch.multiprocessing.set_start_method('spawn')
    config_name = args.experiment_name
    writer = SummaryWriter(log_dir="/network-ceph/pgrundmann/video_runs/" +
                           config_name + "/")

    BATCH_SIZE = args.batchsize
    EPOCHS = args.epochs
    LR = 0.0001
    TRAIN_MODE = args.trainingtype

    if args.imagenet:
        composed = transforms.Compose(
            [transforms.Resize(128),
             transforms.RandomCrop(112)])
        imagenet_train = torchvision.datasets.ImageFolder(
            "/network-ceph/pgrundmann/maschinelles_sehen/ImageNet-Datasets-Downloader/imagenet/imagenet_images",
            transform=composed)
        imnet_ds = ImageDataset(imagenet_train,
                                type='training',
                                training=TRAIN_MODE)
        if TRAIN_MODE == 'classification':
            weights_a = torch.tensor(imnet_ds.weights_a,
                                     dtype=torch.float).cuda()
            weights_b = torch.tensor(imnet_ds.weights_b,
                                     dtype=torch.float).cuda()
        loader = FastDataLoader(imnet_ds,
                                shuffle=True,
                                batch_size=args.batchsize,
                                pin_memory=True,
                                num_workers=8)
    else:
        train_video_filenames, _ = loadVideos(
            "/network-ceph/pgrundmann/youtube_processed")
        print("Loaded Video-filenames")
        train_loaders = []
        train_datasets = []
        random.shuffle(train_video_filenames)
        try:
            train_datasets = torch.load(
                "/network-ceph/pgrundmann/maschinelles_sehen/train_datasets.pt"
            )
        except:
            for fname in tqdm(train_video_filenames):
                train_datasets.append(VideoDataset(fname, BATCH_SIZE))
            torch.save(train_datasets, "train_datasets.pt")

        for ds in train_datasets:
            ds.training_type = TRAIN_MODE
        if TRAIN_MODE == 'classification':
            # calculate the weights for a and b based on imagenet (is faster than to do it on the video-dataset)
            composed = transforms.Compose(
                [transforms.Resize(128),
                 transforms.RandomCrop(112)])
            imagenet_train = torchvision.datasets.ImageFolder(
                "/network-ceph/pgrundmann/maschinelles_sehen/ImageNet-Datasets-Downloader/imagenet/imagenet_images",
                transform=composed)
            imnet_ds = ImageDataset(imagenet_train,
                                    type='training',
                                    training=TRAIN_MODE)
            weights_a = torch.tensor(imnet_ds.weights_a,
                                     dtype=torch.float).cuda()
            weights_b = torch.tensor(imnet_ds.weights_b,
                                     dtype=torch.float).cuda()

        ds = torch.utils.data.ConcatDataset(train_datasets)
        loader = torch.utils.data.DataLoader(ds,
                                             batch_size=None,
                                             num_workers=4,
                                             pin_memory=True)

    print("Loaded dataloaders and datasets")

    if TRAIN_MODE == 'regression':
        criterion = torch.nn.MSELoss()
    else:
        criterion = custom_loss

    if TRAIN_MODE == 'classification':
        if args.use_lstm:
            if args.stateful:
                model = ColorCNN_CLASS_LSTM_STATEFUL()
            else:
                model = ColorCNN_CLASS_LSTM()
        else:
            model = ColorCNN_CLASS()
    else:
        if args.use_lstm:
            if args.stateful:
                model = ColorCNN_REGRESSION_LSTM_STATEFUL()
            else:
                model = ColorCNN_REGRESSION_LSTM()
        else:
            model = ColorCNN_REGRESSION()
    model = model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=LR, eps=1e-8)

    step = 0
    for i in range(EPOCHS):
        if TRAIN_MODE == 'classification':
            step = train_classification(model,
                                        loader,
                                        criterion,
                                        weights_a,
                                        weights_b,
                                        optimizer,
                                        writer,
                                        stepsTilNow=step,
                                        stop_after=args.steps_per_epoch,
                                        stateful=args.stateful)
            video_evaluation_classification(model,
                                            i,
                                            args.eval_savepath,
                                            args.testvideo_filename,
                                            use_lstm=args.use_lstm,
                                            imagenet=args.imagenet)
            if not args.imagenet:
                random.shuffle(train_datasets)
                ds = torch.utils.data.ConcatDataset(train_datasets)
                loader = torch.utils.data.DataLoader(ds,
                                                     batch_size=None,
                                                     num_workers=4,
                                                     pin_memory=True)
            if args.use_lstm:
                torch.save(
                    {
                        "model": model.state_dict(),
                        "optimizer": optimizer.state_dict()
                    },
                    "/network-ceph/pgrundmann/video_models/lstm_classification_"
                    + str(i) + ".bin")
            else:
                torch.save(
                    {
                        "model": model.state_dict(),
                        "optimizer": optimizer.state_dict()
                    },
                    "/network-ceph/pgrundmann/video_models/simple_cnn_classification_"
                    + str(i) + ".bin")
        else:
            step = train(model,
                         loader,
                         criterion,
                         optimizer,
                         writer,
                         stepsTilNow=step,
                         stop_after=args.steps_per_epoch,
                         stateful=args.stateful)
            video_evaluation_regression(model,
                                        i,
                                        args.eval_savepath,
                                        args.testvideo_filename,
                                        use_lstm=args.use_lstm,
                                        imagenet=args.imagenet)
            if not args.imagenet:
                random.shuffle(train_datasets)
                ds = torch.utils.data.ConcatDataset(train_datasets)
                loader = torch.utils.data.DataLoader(ds,
                                                     batch_size=None,
                                                     num_workers=4,
                                                     pin_memory=True)
            if args.use_lstm:
                torch.save(
                    {
                        "model": model.state_dict(),
                        "optimizer": optimizer.state_dict()
                    },
                    "/network-ceph/pgrundmann/video_models/lstm_regression_" +
                    str(i) + ".bin")
            else:
                torch.save(
                    {
                        "model": model.state_dict(),
                        "optimizer": optimizer.state_dict()
                    },
                    "/network-ceph/pgrundmann/video_models/simple_cnn_regression_"
                    + str(i) + ".bin")
예제 #5
0
def main():
    parser = argparse.ArgumentParser()
    # FOR DISTRIBUTED:  Parse for the local_rank argument, which will be supplied
    # automatically by torch.distributed.launch.
    parser.add_argument("--local_rank", default=0, type=int)
    args = parser.parse_args()

    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    WORLD_SIZE = 1
    GPU_COUNT=1
    if args.distributed:
        torch.cuda.set_device(args.local_rank % GPU_COUNT)

        torch.distributed.init_process_group(backend='nccl',
                                            world_size=WORLD_SIZE,
                                            init_method='env://')


    torch.multiprocessing.set_start_method('spawn')
    end = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(4))
    config_name = "IMAGE-MODEL-CNN" + end
    if args.local_rank == 0:
        writer = SummaryWriter(log_dir="/network-ceph/pgrundmann/video_runs/" +config_name+"/")
    else:
        writer = None

    BATCH_SIZE=256
    EPOCHS=200
    LR = 0.0002
    TRAIN_MODE = 'regression'

    

    
    train_video_filenames, test_video_filenames = loadVideos("/network-ceph/pgrundmann/youtube_processed_small")
    
    
    train_loaders = []
    train_datasets = []
    training_names = []
    with h5py.File('/network-ceph/pgrundmann/youtube_precalculated/final_dataset.hdf5', 'r') as f:
        for name in f:
            training_names.append(name)

    to_process = len(training_names) // WORLD_SIZE
    training_names = training_names[args.local_rank*to_process:(args.local_rank+1)*to_process]

    random.shuffle(training_names)
    
    for fname in tqdm(training_names):
        train_datasets.append(VideoDataset(fname, 256))
    ds = torch.utils.data.ConcatDataset(train_datasets)
    loader = torch.utils.data.DataLoader(ds, batch_size=None, num_workers=1, pin_memory=True)
    
    print("Loaded dataloaders and datasets")

    split = ''
    '''
    #stl10_dataset_train = torchvision.datasets.STL10("/network-ceph/pgrundmann/stl_10",split='train+unlabeled',download=True)
    #stl10_dataset_test = torchvision.datasets.STL10("/network-ceph/pgrundmann/stl_10",split='test',download=True)
    composed = transforms.Compose([transforms.Resize(128),
                               transforms.RandomCrop(112)])
    imagenet_train = torchvision.datasets.ImageFolder("/network-ceph/pgrundmann/maschinelles_sehen/ImageNet-Datasets-Downloader/imagenet/imagenet_images", transform=composed)
    train_length = round(len(imagenet_train) * 0.99)
    test_length = len(imagenet_train) - train_length
    train_set, val_set = torch.utils.data.random_split(imagenet_train, [train_length, test_length])
    dataset = ImageDataset(train_set,type='training', training=TRAIN_MODE)
    train_loader = FastDataLoader(dataset,batch_size=128, num_workers=12, pin_memory=True, shuffle=True)
    print("Loaded Train-Set")
    
    test_dataset = ImageDataset(val_set,type='test', training=TRAIN_MODE)
    test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=32, num_workers=0, pin_memory=True, shuffle=False)
    print("Loaded Validation-Set")



    if TRAIN_MODE == 'regression':
        criterion = torch.nn.MSELoss()
    else:
        weights_a = torch.tensor(dataset.weights_a, dtype=torch.float).cuda()
        weights_b = torch.tensor(dataset.weights_b, dtype=torch.float).cuda()
        criterion_a = torch.nn.CrossEntropyLoss(weight=weights_a)
        criterion_b = torch.nn.CrossEntropyLoss(weight=weights_b)
        criterion = custom_loss
    
    if TRAIN_MODE == 'classification':
        model = ColorCNN_CLASS()
    else:
        model = ColorCNN_REGRESSION()
    model = model.cuda()
    model = torch.nn.DataParallel(model)



    optimizer = torch.optim.Adam(model.parameters(), lr=LR, eps=1e-8)
    step = 0
    for i in range(EPOCHS):
        #evaluate(model, i, test_loader, writer)
        if TRAIN_MODE == 'classification':
            evaluate_classification(model, i, test_loader, writer)
            step = train_classification(model, train_loader, criterion, weights_a, weights_b, optimizer, writer,stepsTilNow=step)
        else:
            evaluate(model, i, test_loader, writer)
            step = train(model, train_loader, criterion, optimizer, writer, stepsTilNow=step)

        print("Epoch finished")
        if args.local_rank == 0:
            torch.save(model.state_dict(), "/network-ceph/pgrundmann/image_model_mixed_weights_cnn_" + str(i) + ".bin")
        
        '''
        random.shuffle(training_names)
    
        for fname in tqdm(training_names):
            train_datasets.append(VideoDataset(fname, 64))
        ds = torch.utils.data.ConcatDataset(train_datasets)
        loader = torch.utils.data.DataLoader(ds, batch_size=None, num_workers=1, pin_memory=True)
        '''