コード例 #1
0
def main():
    global test_csv

    # evaluation mode
    evalute_filepath = '/root/workspace/depth/sparse-to-dense.pytorch/results/uw_nyu.sparsifier=uar.samples=0.modality=rgb.arch=resnet50.decoder=upproj.criterion=l1.lr=0.01.bs=16.pretrained=True(old)'
    best_weights_path = os.path.join(evalute_filepath, 'best_model.pkl')
    assert os.path.isfile(best_weights_path), \
    "=> no best weights found at '{}'".format(evalute_filepath)
    print("=> loading best weights for Model '{}'".format(evalute_filepath))

    val_loader = create_data_loaders()

    decoder = 'upproj'

    model = ResNet(layers=50,
                   decoder=decoder,
                   output_size=val_loader.dataset.output_size,
                   pretrained=False)
    model = model.cuda()
    model.load_state_dict(torch.load(best_weights_path))

    print("=> loaded best weights for Model")

    output_directory = os.path.join('results/uw_test', 'uw_test4')
    validate(val_loader, model, output_directory=output_directory)
コード例 #2
0
ファイル: core.py プロジェクト: DimTrigkakis/MasterThesis
class viewpoint_classifier():
    def __init__(self, model,dataset_index=0,video_target = None):

        if args.video == None:
            
            self.video_target = video_target
            customset_train = CustomDataset(path = args.dataset_path,subset_type="training",dataset_index=dataset_index,video_target = video_target)
            customset_test = CustomDataset(path = args.dataset_path,subset_type="testing",dataset_index=dataset_index, video_target = video_target)
        
            self.trainloader = torch.utils.data.DataLoader(dataset=customset_train,batch_size=args.batch_size,shuffle=True,num_workers=args.num_workers)
            self.testloader = torch.utils.data.DataLoader(dataset=customset_test,batch_size=args.batch_size,shuffle=False,num_workers=args.num_workers)    
        else:
            video_dataset = VideoDataset(video=args.video, batch_size=args.batch_size,
                                        frame_skip=int(args.frame_skip),image_folder=args.extract_frames_path, use_existing=args.use_existing_frames)
            
            self.videoloader = torch.utils.data.DataLoader(dataset=video_dataset, batch_size=1,shuffle=False,num_workers=args.num_workers)

   
        if (model == "alex"):
            self.model = AlexNet()
        elif (model == "vgg"):
            self.model = VGG()
        elif (model == "resnet"):
            self.model = ResNet()

        if args.pretrained_model != None:
            if args.pretrained_finetuning == False:
                self.model.load_state_dict(torch.load(args.pretrained_model))
            else:
                print "DEBUG : Make it load only part of the resnet model"
                #print(self.model)
                #self.model.load_state_dict(torch.load(args.pretrained_model))
                #for param in self.model.parameters():
                #    param.requires_grad = False
                self.model.fc = nn.Linear(512, 1000)
                #print(self.model)
                self.model.load_state_dict(torch.load(args.pretrained_model))
                self.model.fc = nn.Linear(512,3)
                #print(self.model)
                
        self.model.cuda()        
        print "Using weight decay: ",args.weight_decay
        self.optimizer = optim.SGD(self.model.parameters(), weight_decay=float(args.weight_decay),lr=0.01, momentum=0.9,nesterov=True)
        self.criterion = nn.CrossEntropyLoss().cuda()
コード例 #3
0
def main():
    global test_csv

    # evaluation mode
    evalute_filepath = '/root/workspace/depth/sparse-to-dense.pytorch/results/uw_nyu.sparsifier=uar.samples=0.modality=rgb.arch=resnet50.decoder=upproj.criterion=l1.lr=0.01.bs=16.pretrained=True(old)'
    best_weights_path = os.path.join(evalute_filepath, 'best_model.pkl')
    assert os.path.isfile(best_weights_path), \
    "=> no best weights found at '{}'".format(evalute_filepath)
    print(
        "=> loading best weights for SphereFCRN '{}'".format(evalute_filepath))

    val_loader = create_data_loaders()

    decoder = 'upproj'

    model = ResNet(layers=50,
                   decoder=decoder,
                   output_size=val_loader.dataset.output_size,
                   pretrained=False)
    model = model.cuda()
    model.load_state_dict(torch.load(best_weights_path))
    # model.decoder.apply(weights_init)

    print("=> loaded best weights for SphereFCRN")

    # print(model)

    # create results folder, if not already exists
    output_directory = os.path.join('results', 'uw_test5')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    result, img_merge = validate(val_loader, model, write_to_file=True)

    # create new csv files
    with open(test_csv, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
    with open(best_txt, 'w') as txtfile:
        txtfile.write(
            "mse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
            .format(result.mse, result.rmse, result.absrel, result.lg10,
                    result.mae, result.delta1, result.gpu_time))
    if img_merge is not None:
        img_filename = output_directory + '/comparison_best.png'
        utils.save_image(img_merge, img_filename)
コード例 #4
0
def init_model(nfm=32,
              res_blocks=1,
              in_frames=2,
              batch_size=2,
              epoch_to_load=None):

    resnet = ResNet(nfm*2, res_blocks)
    if torch.cuda.is_available(): resnet=resnet.cuda()

    my_unet = U_Net(nfm, resnet, 1, 1)
    discriminator = CNN((in_frames+1)*3, nfm, 512)

    if epoch_to_load != None:
        my_unet = torch.load('unet_epoch_{}'.format(epoch_to_load))
        discriminator = torch.load('D_epoch_{}'.format(epoch_to_load))

    if torch.cuda.is_available(): my_unet, discriminator = my_unet.cuda(), discriminator.cuda()

    Unet_optim = torch.optim.Adam(my_unet.parameters(), lr=0.002)
    D_optim = torch.optim.Adam(discriminator.parameters(), lr=0.002)

    return {'Unet': my_unet, 'Discriminator': discriminator, 'Unet_optimizer': Unet_optim, 'Discriminator_optimizer': D_optim}
コード例 #5
0
def main():
    global test_csv

    # evaluation mode
    evalute_filepath = '/root/workspace/depth/sparse-to-dense.pytorch/results/uw_nyu.sparsifier=uar.samples=0.modality=rgb.arch=resnet50.decoder=upproj.criterion=l1.lr=0.01.bs=16.pretrained=True(old)'
    best_weights_path = os.path.join(evalute_filepath, 'best_model.pkl')
    assert os.path.isfile(best_weights_path), \
    "=> no best weights found at '{}'".format(evalute_filepath)
    print("=> loading best weights for model '{}'".format(evalute_filepath))

    val_loader = create_data_loaders()

    decoder = 'upproj'

    model = ResNet(layers=50,
                   decoder=decoder,
                   output_size=val_loader.dataset.output_size,
                   pretrained=False)
    model = model.cuda()
    model.load_state_dict(torch.load(best_weights_path))
    # model.decoder.apply(weights_init)

    print("=> loaded best weights for model")

    # create results folder, if not already exists
    output_directory = os.path.join('results/uw_test', 'uw_test5')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    best_txt = os.path.join(output_directory, 'best.txt')

    result = validate(val_loader, model, output_directory=output_directory)

    # create new csv files
    with open(best_txt, 'w') as txtfile:
        txtfile.write("rmse={:.3f}\nabsrel={:.3f}\ndelta1={:.3f}\n".format(
            result[0], result[1], result[2]))
コード例 #6
0
def main():
    global args, best_result, output_directory, train_csv, test_csv

    sparsifier = None
    max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf
    if args.sparsifier == UniformSampling.name:
        sparsifier = UniformSampling(num_samples=args.num_samples,
                                     max_depth=max_depth)
    elif args.sparsifier == SimulatedStereo.name:
        sparsifier = SimulatedStereo(num_samples=args.num_samples,
                                     max_depth=max_depth)

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()
    out_channels = 1

    # Data loading code
    print("=> creating data loaders ...")
    traindir = os.path.join('data', args.data, 'train')
    valdir = os.path.join('data', args.data, 'val')

    train_dataset = NYUDataset(traindir,
                               type='train',
                               modality=args.modality,
                               sparsifier=sparsifier)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=None)

    # set batch size to be 1 for validation
    val_dataset = NYUDataset(valdir,
                             type='val',
                             modality=args.modality,
                             sparsifier=sparsifier)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    print("=> data loaders created.")

    # evaluation mode
    if args.evaluate:
        best_model_filename = os.path.join(output_directory,
                                           'model_best.pth.tar')
        assert os.path.isfile(best_model_filename), \
        "=> no best model found at '{}'".format(best_model_filename)
        print("=> loading best model '{}'".format(best_model_filename))
        checkpoint = torch.load(best_model_filename)
        args.start_epoch = checkpoint['epoch']
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        assert os.path.isfile(args.resume), \
            "=> no checkpoint found at '{}'".format(args.resume)
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))

    # create new model
    else:
        # define model
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'resnet50':
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        print("=> model created.")

        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        # create new csv files with only header
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()
    print(model)
    print("=> model transferred to GPU.")

    for epoch in range(args.start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        result, img_merge = validate(val_loader, model, epoch)

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)
コード例 #7
0
def main():
    global args, best_result, output_directory, train_csv, test_csv

    # evaluation mode
    start_epoch = 0
    if args.evaluate:
        assert os.path.isfile(args.evaluate), \
        "=> no best model found at '{}'".format(args.evaluate)
        print("=> loading best model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        output_directory = os.path.dirname(args.evaluate)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        _, val_loader = create_data_loaders(args)
        args.evaluate = True
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return
    elif args.crossTrain:
        print("Retraining loaded model on current input parameters")
        train_loader, val_loader = create_data_loaders(args)
        checkpoint = torch.load(args.crossTrain)
        model = checkpoint['model']
        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
            momentum=args.momentum, weight_decay=args.weight_decay)
        model = model.cuda()

    # optionally resume from a checkpoint
    elif args.resume:
        chkpt_path = args.resume
        assert os.path.isfile(chkpt_path), \
            "=> no checkpoint found at '{}'".format(chkpt_path)
        print("=> loading checkpoint '{}'".format(chkpt_path))
        checkpoint = torch.load(chkpt_path)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        output_directory = os.path.dirname(os.path.abspath(chkpt_path))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        train_loader, val_loader = create_data_loaders(args)
        args.resume = True

    # create new model
    else:
        train_loader, val_loader = create_data_loaders(args)
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'resnet50':
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           output_size=train_loader.dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           output_size=train_loader.dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        print("=> model created.")
        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
            momentum=args.momentum, weight_decay=args.weight_decay)

        # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
        model = model.cuda()

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    for epoch in range(start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)
        train(train_loader, model, criterion, optimizer,
              epoch)  # train for one epoch
        result, img_merge = validate(val_loader, model,
                                     epoch)  # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)
コード例 #8
0
def train(working_dir, grid_size, learning_rate, batch_size, num_walks,
          model_type, fn):
    train_props, val_props, test_props = get_props(working_dir,
                                                   dtype=np.float32)
    means_stds = np.loadtxt(working_dir + "/means_stds.csv",
                            dtype=np.float32,
                            delimiter=',')

    # filter out redundant qm8 properties
    if train_props.shape[1] == 16:
        filtered_labels = list(range(0, 8)) + list(range(12, 16))
        train_props = train_props[:, filtered_labels]
        val_props = val_props[:, filtered_labels]
        test_props = test_props[:, filtered_labels]

        means_stds = means_stds[:, filtered_labels]
    if model_type == "resnet18":
        model = ResNet(BasicBlock, [2, 2, 2, 2],
                       grid_size,
                       "regression",
                       feat_nums,
                       e_sizes,
                       num_classes=train_props.shape[1])
    elif model_type == "resnet34":
        model = ResNet(BasicBlock, [3, 4, 6, 3],
                       grid_size,
                       "regression",
                       feat_nums,
                       e_sizes,
                       num_classes=train_props.shape[1])
    elif model_type == "resnet50":
        model = ResNet(Bottleneck, [3, 4, 6, 3],
                       grid_size,
                       "regression",
                       feat_nums,
                       e_sizes,
                       num_classes=train_props.shape[1])
    elif model_type == "densenet121":
        model = densenet121(grid_size,
                            "regression",
                            feat_nums,
                            e_sizes,
                            num_classes=train_props.shape[1])
    elif model_type == "densenet161":
        model = densenet161(grid_size,
                            "regression",
                            feat_nums,
                            e_sizes,
                            num_classes=train_props.shape[1])
    elif model_type == "densenet169":
        model = densenet169(grid_size,
                            "regression",
                            feat_nums,
                            e_sizes,
                            num_classes=train_props.shape[1])
    elif model_type == "densenet201":
        model = densenet201(grid_size,
                            "regression",
                            feat_nums,
                            e_sizes,
                            num_classes=train_props.shape[1])
    else:
        print("specify a valid model")
        return
    model.float()
    model.cuda()
    loss_function_train = nn.MSELoss(reduction='none')
    loss_function_val = nn.L1Loss(reduction='none')
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # if model_type[0] == "r":
    # 	batch_size = 128
    # 	optimizer = torch.optim.SGD(model.parameters(), lr=0.1,
    # 					   momentum=0.9, weight_decay=5e-4, nesterov=True)
    # elif model_type[0] == "d":
    # 	batch_size = 512
    # 	optimizer = torch.optim.SGD(model.parameters(), lr=0.1,
    # 					   momentum=0.9, weight_decay=1e-4, nesterov=True)
    # else:
    # 	print("specify a vlid model")
    # 	return

    stds = means_stds[1, :]
    tl_list = []
    vl_list = []

    log_file = open(fn + "txt", "w")
    log_file.write("start")
    log_file.flush()

    for file_num in range(num_loads):
        if file_num % 20 == 0:
            model_file = open("../../scratch/" + fn + ".pkl", "wb")
            pickle.dump(model, model_file)
            model_file.close()

        log_file.write("load: " + str(file_num))
        print("load: " + str(file_num))
        # Get new random walks
        if file_num == 0:
            t = time.time()
            train_loader, val_loader, test_loader = get_loaders(working_dir, \
                        file_num, \
                        grid_size, \
                        batch_size, \
                        train_props, \
                        val_props=val_props, \
                        test_props=test_props)
            print("load time")
            print(time.time() - t)
        else:
            file_num = random.randint(0, num_walks - 1)
            t = time.time()
            train_loader, _, _ = get_loaders(working_dir, \
                   file_num, \
                   grid_size, \
                   batch_size, \
                   train_props)
            print("load time")
            print(time.time() - t)
        # Train on set of random walks, can do multiple epochs if desired
        for epoch in range(epochs_per_load):
            model.train()
            t = time.time()
            train_loss_list = []
            train_mae_loss_list = []
            for i, (walks_int, walks_float, props) in enumerate(train_loader):
                walks_int = walks_int.cuda()
                walks_int = walks_int.long()
                walks_float = walks_float.cuda()
                walks_float = walks_float.float()
                props = props.cuda()
                outputs = model(walks_int, walks_float)
                # Individual losses for each item
                loss_mae = torch.mean(loss_function_val(props, outputs), 0)
                train_mae_loss_list.append(loss_mae.cpu().detach().numpy())
                loss = torch.mean(loss_function_train(props, outputs), 0)
                train_loss_list.append(loss.cpu().detach().numpy())
                # Loss converted to single value for backpropagation
                loss = torch.sum(loss)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            model.eval()
            val_loss_list = []
            with torch.no_grad():
                for i, (walks_int, walks_float,
                        props) in enumerate(val_loader):
                    walks_int = walks_int.cuda()
                    walks_int = walks_int.long()
                    walks_float = walks_float.cuda()
                    walks_float = walks_float.float()
                    props = props.cuda()
                    outputs = model(walks_int, walks_float)
                    # Individual losses for each item
                    loss = loss_function_val(props, outputs)
                    val_loss_list.append(loss.cpu().detach().numpy())
            # ith row of this array is the losses for each label in batch i
            train_loss_arr = np.array(train_loss_list)
            train_mae_arr = np.array(train_mae_loss_list)
            log_file.write("training mse loss\n")
            log_file.write(str(np.mean(train_loss_arr)) + "\n")
            log_file.write("training mae loss\n")
            log_file.write(str(np.mean(train_mae_arr)) + "\n")
            print("training mse loss")
            print(str(np.mean(train_loss_arr)))
            print("training mae loss")
            print(str(np.mean(train_mae_arr)))
            val_loss_arr = np.concatenate(val_loss_list, 0)
            val_loss = np.mean(val_loss_arr, 0)
            log_file.write("val loss\n")
            log_file.write(str(np.mean(val_loss_arr)) + "\n")
            print("val loss")
            print(str(np.mean(val_loss_arr)))
            # Unnormalized loss is for comparison to papers
            tnl = np.mean(train_mae_arr, 0)
            log_file.write("train normalized losses\n")
            log_file.write(" ".join(list(map(str, tnl))) + "\n")
            print("train normalized losses")
            print(" ".join(list(map(str, tnl))))
            log_file.write("val normalized losses\n")
            log_file.write(" ".join(list(map(str, val_loss))) + "\n")
            print("val normalized losses")
            print(" ".join(list(map(str, val_loss))))
            tunl = stds * tnl
            log_file.write("train unnormalized losses\n")
            log_file.write(" ".join(list(map(str, tunl))) + "\n")
            print("train unnormalized losses")
            print(" ".join(list(map(str, tunl))))
            vunl = stds * val_loss
            log_file.write("val unnormalized losses\n")
            log_file.write(" ".join(list(map(str, vunl))) + "\n")
            log_file.write("\n")
            print("val unnormalized losses")
            print(" ".join(list(map(str, vunl))))
            print("\n")
            print("time")
            print(time.time() - t)
        file_num += 1
        log_file.flush()
    log_file.close()
    return model
コード例 #9
0
def main():
    global args, best_result, output_directory, train_csv, test_csv
    args = parser.parse_args()
    if args.modality == 'rgb' and args.num_samples != 0:
        print("number of samples is forced to be 0 when input modality is rgb")
        args.num_samples = 0
    if args.modality == 'rgb' and args.max_depth != 0.0:
        print("max depth is forced to be 0.0 when input modality is rgb/rgbd")
        args.max_depth = 0.0

    sparsifier = None
    max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf
    if args.sparsifier == UniformSampling.name:
        sparsifier = UniformSampling(num_samples=args.num_samples,
                                     max_depth=max_depth)
    elif args.sparsifier == SimulatedStereo.name:
        sparsifier = SimulatedStereo(num_samples=args.num_samples,
                                     max_depth=max_depth)

    # create results folder, if not already exists
    output_directory = os.path.join(
        '/media/kuowei/8EB89C8DB89C7585/results_CS',
        '{}'.format(args.outputdir),
        '{}.sparsifier={}.modality={}.arch={}.decoder={}.criterion={}.lr={}.bs={}'
        .format(args.data, sparsifier, args.modality, args.arch, args.decoder,
                args.criterion, args.lr, args.batch_size))
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()
    out_channels = 1

    # Data loading code
    print("=> creating data loaders ...")

    traindir = os.path.join('/media/kuowei/8EB89C8DB89C7585/data', args.data,
                            'train')
    valdir = os.path.join('/media/kuowei/8EB89C8DB89C7585/data', args.data,
                          'val')

    # traindir = os.path.join('data', args.data, 'train')
    # valdir = os.path.join('data', args.data, 'val')

    # if args.data == 'kitti':
    # 	pass
    # rgb_dir = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/train/rgb/'
    # sparse_depth_dir = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/train/sd/'
    # continuous_depth_dir = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/train/cd/'
    # ground_dir = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/train/ground/'
    # train_dataset = RgbdDataset(rgb_dir, sparse_depth_dir, continuous_depth_dir, ground_dir)
    # train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None))

    # # set batch size to be 1 for validation
    # rgb_dir_val = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/validate/rgb/'
    # sparse_depth_dir_val = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/validate/sd/'
    # continuous_depth_dir_val = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/validate/cd/'
    # ground_dir_val = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/validate/ground/'
    # val_dataset = RgbdDataset(rgb_dir_val, sparse_depth_dir_val, continuous_depth_dir_val, ground_dir_val)
    # val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True)

    # elif args.data == 'nyudepthv2':
    train_dataset = NYUDataset(traindir,
                               type='train',
                               modality=args.modality,
                               sparsifier=sparsifier)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=None)

    # set batch size to be 1 for validation
    val_dataset = NYUDataset(valdir,
                             type='val',
                             modality=args.modality,
                             sparsifier=sparsifier)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    print("=> data loaders created.")

    # evaluation mode
    if args.evaluate:
        best_model_filename = os.path.join(output_directory,
                                           'model_best.pth.tar')
        if os.path.isfile(best_model_filename):
            print("=> loading best model '{}'".format(best_model_filename))
            checkpoint = torch.load(best_model_filename)
            args.start_epoch = checkpoint['epoch']
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            print("=> loaded best model (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_filename))
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch'] + 1
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            optimizer = checkpoint['optimizer']
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
            return

    # create new model
    else:
        # define model
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'resnet50':
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet152':
            model = ResNet(layers=152,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        print("=> model created.")

        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        # create new csv files with only header
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()
    print(model)
    print("=> model transferred to GPU.")

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        result, img_merge = validate(val_loader, model, epoch)

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch)
コード例 #10
0
    datareader.get_data_info()
    X, Y, person_cam_index = datareader.read_images('train',
                                                    need_augmentation=True)
    X_n = X.shape[0]

if args.train:
    ''' =========================== Load model ============================= '''
    print('Loading Model.')
    model = ResNet(depth=50,
                   pretrained=True,
                   cut_at_pooling=False,
                   num_features=num_features,
                   norm=False,
                   dropout=0.5,
                   num_classes=datareader.num_class)
    model.cuda()
    ''' ===================== Fine-tune model by ID classification ============ '''
    n_iters = args.finetune_n_iters
    lr = args.finetune_lr
    batch_size = args.finetune_batch_size
    show_iters = args.show_iters
    # Criterion
    criterion = nn.CrossEntropyLoss().cuda()
    # Optimizer
    if hasattr(model, 'base'):
        base_param_ids = set(map(id, model.base.parameters()))
        new_params = [
            p for p in model.parameters() if id(p) not in base_param_ids
        ]
        param_groups = [{
            'params': model.base.parameters(),
コード例 #11
0
ファイル: main.py プロジェクト: LeonSun0101/CD-SD
def main():
    global args, best_result, output_directory, train_csv, test_csv

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    #建立文件
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # define loss function (criterion) and optimizer,定义误差函数和优化器
    if args.criterion == 'l2':
        #均方差
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()

    # sparsifier is a class for generating random sparse depth input from the ground truth
    sparsifier = None
    max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf
    if args.sparsifier == UniformSampling.name:
        sparsifier = UniformSampling(num_samples=args.num_samples,
                                     max_depth=max_depth)
    elif args.sparsifier == SimulatedStereo.name:
        sparsifier = SimulatedStereo(num_samples=args.num_samples,
                                     max_depth=max_depth)

    # Data loading code
    print("=> creating data loaders ...")
    traindir = os.path.join('data', args.data, 'train')
    valdir = os.path.join('data', args.data, 'val')

    if args.data == 'nyudepthv2':
        #需要的时候才把函数载入
        from dataloaders.nyu_dataloader import NYUDataset
        train_dataset = NYUDataset(traindir,
                                   type='train',
                                   modality=args.modality,
                                   sparsifier=sparsifier)
        val_dataset = NYUDataset(valdir,
                                 type='val',
                                 modality=args.modality,
                                 sparsifier=sparsifier)

    elif args.data == 'kitti':
        from dataloaders.kitti_dataloader import KITTIDataset
        train_dataset = KITTIDataset(traindir,
                                     type='train',
                                     modality=args.modality,
                                     sparsifier=sparsifier)
        val_dataset = KITTIDataset(valdir,
                                   type='val',
                                   modality=args.modality,
                                   sparsifier=sparsifier)

    else:
        raise RuntimeError(
            'Dataset not found.' +
            'The dataset must be either of nyudepthv2 or kitti.')

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        sampler=None,
        worker_init_fn=lambda work_id: np.random.seed(work_id))
    # worker_init_fn ensures different sampling patterns for each data loading thread

    # set batch size to be 1 for validation
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    print("=> data loaders created.")

    # evaluation mode,测试模式,拿最好的效果进行测试
    if args.evaluate:
        best_model_filename = os.path.join(output_directory,
                                           'model_best.pth.tar')
        assert os.path.isfile(best_model_filename), \
        "=> no best model found at '{}'".format(best_model_filename)
        print("=> loading best model '{}'".format(best_model_filename))
        checkpoint = torch.load(best_model_filename)
        args.start_epoch = checkpoint['epoch']
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        assert os.path.isfile(args.resume), \
            "=> no checkpoint found at '{}'".format(args.resume)
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))

    # create new model,建立模型,并且训练
    else:
        # define model
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(
            args.modality)  #in_channels是modality的长度,如果输入rgbd那么就是4通道。
        #这一边只提供了两个选择50或者18
        if args.arch == 'resnet50':  #调用ResNet的定义实例化model,这里的in_channels是
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           output_size=train_dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           output_size=train_dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        print("=> model created.")

        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
            momentum=args.momentum, weight_decay=args.weight_decay)

        # create new csv files with only header
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
    model = model.cuda()
    # print(model)
    print("=> model transferred to GPU.")

    for epoch in range(args.start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)
        train(train_loader, model, criterion, optimizer,
              epoch)  # train for one epoch
        result, img_merge = validate(
            val_loader, model,
            epoch)  # evaluate on validation set,每次训练完以后都要测试一下

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)
コード例 #12
0
def main():
    global args, best_result, output_directory, train_csv, test_csv  # 全局变量
    args = parser.parse_args()  # 获取参数值
    args.data = os.path.join('data', args.data)
    # os.path.join()函数:将多个路径组合后返回
    # 语法:os.path.join(path1[,path2[,......]])
    # 注:第一个绝对路径之前的参数将被忽略
    # 注意if的语句后面有冒号
    # args中modality的参数值。modality之前定义过
    if args.modality == 'rgb' and args.num_samples != 0:
        print("number of samples is forced to be 0 when input modality is rgb")
        args.num_samples = 0
# 若是RGB的sparse-to-dense,则在生成训练数据时将稀疏深度点设为0

# create results folder, if not already exists
    output_directory = os.path.join(
        'results',
        'NYUDataset.modality={}.nsample={}.arch={}.decoder={}.criterion={}.lr={}.bs={}'
        .format(args.modality, args.num_samples, args.arch, args.decoder,
                args.criterion, args.lr, args.batch_size))  # 输出文件名的格式

    # 如果路径不存在
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda(
        )  # 调用别的py文件中的内容时,若被调用的是函数,则直接写函数名即可;若被调用的是类,则要按这句话的格式写
        out_channels = 1
# elif: else if
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()
        out_channels = 1

    # Data loading code
    print("=> creating data loaders ...")
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    train_dataset = NYUDataset(traindir,
                               type='train',
                               modality=args.modality,
                               num_samples=args.num_samples)
    # DataLoader是导入数据的函数
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=None)

    # set batch size to be 1 for validation
    val_dataset = NYUDataset(valdir,
                             type='val',
                             modality=args.modality,
                             num_samples=args.num_samples)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    print("=> data loaders created.")

    # evaluation mode
    if args.evaluate:
        best_model_filename = os.path.join(output_directory,
                                           'model_best.pth.tar')
        if os.path.isfile(best_model_filename):
            print("=> loading best model '{}'".format(best_model_filename))
            checkpoint = torch.load(best_model_filename)
            args.start_epoch = checkpoint['epoch']
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            print("=> loaded best model (epoch {})".format(
                checkpoint['epoch']))
        else:  # else也要加:
            print("=> no best model found at '{}'".format(best_model_filename))
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch'] + 1
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            optimizer = checkpoint['optimizer']
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # create new model
    else:
        # define model
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)  # len()返回对象的长度或项目个数
        if args.arch == 'resnet50':
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        print("=> model created.")

        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        # create new csv files with only header
        # with open() as xxx: 的用法详见https://www.cnblogs.com/ymjyqsx/p/6554817.html
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()
    print(model)
    print("=> model transferred to GPU.")

    # for循环也要有:
    # 一般情况下,循环次数未知采用while循环,循环次数已知采用for
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        result, img_merge = validate(val_loader, model, epoch)
        # Python的return可以返回多个值

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                # 字符串格式化输出
                # :3f中,3表示输出宽度,f表示浮点型。若输出位数小于此宽度,则默认右对齐,左边补空格。
                #       若输出位数大于宽度,则按实际位数输出。
                # :.3f中,.3表示指定除小数点外的输出位数,f表示浮点型。
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            # None表示该值是一个空对象,空值是Python里一个特殊的值,用None表示。None不能理解为0,因为0是有意义的,而None是一个特殊的空值。
            # 你可以将None赋值给任何变量,也可以将任何变量赋值给一个None值的对象
            # None在判断的时候是False
            # NULL是空字符,和None不一样
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)


# Python中,万物皆对象,所有的操作都是针对对象的。一个对象包括两方面的特征:
# 属性:去描述它的特征
# 方法:它所具有的行为
# 所以,对象=属性+方法 (其实方法也是一种属性,一种区别于数据属性的可调用属性)

        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch)
コード例 #13
0
def main() -> int:
    best_result = Result()
    best_result.set_to_worst()
    args: Any
    args = parser.parse_args()
    dataset = args.data
    if args.modality == 'rgb' and args.num_samples != 0:
        print("number of samples is forced to be 0 when input modality is rgb")
        args.num_samples = 0
    image_shape = (192, 256)  # if "my" in args.arch else (228, 304)

    # create results folder, if not already exists
    if args.transfer_from:
        output_directory = f"{args.transfer_from}_transfer"
    else:
        output_directory = utils.get_output_dir(args)
    args.data = os.path.join(os.environ["DATASET_DIR"], args.data)
    print("output directory :", output_directory)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    elif not args.evaluate:
        raise Exception("output directory allready exists")

    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()
    out_channels = 1
    # Data loading code
    print("=> creating data loaders ...")
    traindir = os.path.join(args.data, 'train')
    valdir = traindir if dataset == "SUNRGBD" else os.path.join(
        args.data, 'val')
    DatasetType = choose_dataset_type(dataset)
    train_dataset = DatasetType(traindir,
                                phase='train',
                                modality=args.modality,
                                num_samples=args.num_samples,
                                square_width=args.square_width,
                                output_shape=image_shape,
                                depth_type=args.depth_type)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=None)

    print("=> training examples:", len(train_dataset))

    val_dataset = DatasetType(valdir,
                              phase='val',
                              modality=args.modality,
                              num_samples=args.num_samples,
                              square_width=args.square_width,
                              output_shape=image_shape,
                              depth_type=args.depth_type)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    print("=> validation examples:", len(val_dataset))

    print("=> data loaders created.")

    # evaluation mode
    if args.evaluate:
        best_model_filename = os.path.join(output_directory,
                                           'model_best.pth.tar')
        if os.path.isfile(best_model_filename):
            print("=> loading best model '{}'".format(best_model_filename))
            checkpoint = torch.load(best_model_filename)
            args.start_epoch = checkpoint['epoch']
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            print("=> loaded best model (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_filename))
        avg_result, avg_result_inside, avg_result_outside, _, results, evaluator = validate(
            val_loader,
            args.square_width,
            args.modality,
            output_directory,
            args.print_freq,
            test_csv,
            model,
            checkpoint['epoch'],
            write_to_file=False)
        write_results(best_txt, avg_result, avg_result_inside,
                      avg_result_outside, checkpoint['epoch'])
        for loss_name, losses in [
            ("rmses", (res.result.rmse for res in results)),
            ("delta1s", (res.result.delta1 for res in results)),
            ("delta2s", (res.result.delta2 for res in results)),
            ("delta3s", (res.result.delta3 for res in results)),
            ("maes", (res.result.mae for res in results)),
            ("absrels", (res.result.absrel for res in results)),
            ("rmses_inside", (res.result_inside.rmse for res in results)),
            ("delta1s_inside", (res.result_inside.delta1 for res in results)),
            ("delta2s_inside", (res.result_inside.delta2 for res in results)),
            ("delta3s_inside", (res.result_inside.delta3 for res in results)),
            ("maes_inside", (res.result_inside.mae for res in results)),
            ("absrels_inside", (res.result_inside.absrel for res in results)),
            ("rmses_outside", (res.result_outside.rmse for res in results)),
            ("delta1s_outside", (res.result_outside.delta1
                                 for res in results)),
            ("delta2s_outside", (res.result_outside.delta2
                                 for res in results)),
            ("delta3s_outside", (res.result_outside.delta3
                                 for res in results)),
            ("maes_outside", (res.result_outside.mae for res in results)),
            ("absrels_outside", (res.result_outside.absrel
                                 for res in results)),
        ]:
            with open(
                    os.path.join(output_directory,
                                 f"validation_{loss_name}.csv"),
                    "w") as csv_file:
                wr = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
                wr.writerow(losses)

        evaluator.save_plot(os.path.join(output_directory, "best.png"))
        return 0

    # optionally resume from a checkpoint
    elif args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch'] + 1
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            optimizer = checkpoint['optimizer']
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
            return 1
    # create new model
    else:
        if args.transfer_from:
            if os.path.isfile(args.transfer_from):
                print(f"=> loading checkpoint '{args.transfer_from}'")
                checkpoint = torch.load(args.transfer_from)
                args.start_epoch = 0
                model = checkpoint['model']
                print("=> loaded checkpoint")
                train_params = list(model.conv3.parameters()) + list(
                    model.decoder.layer4.parameters(
                    )) if args.train_top_only else model.parameters()
            else:
                print(f"=> no checkpoint found at '{args.transfer_from}'")
                return 1
        else:
            # define model
            print("=> creating Model ({}-{}) ...".format(
                args.arch, args.decoder))
            in_channels = len(args.modality)
            if args.arch == 'resnet50':
                n_layers = 50
            elif args.arch == 'resnet18':
                n_layers = 18
            model = ResNet(layers=n_layers,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained,
                           image_shape=image_shape,
                           skip_type=args.skip_type)
            print("=> model created.")
            train_params = model.parameters()

        adjusting_learning_rate = False
        if args.optimizer == "sgd":
            optimizer = torch.optim.SGD(train_params,
                                        args.lr,
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)
            adjusting_learning_rate = True
        elif args.optimizer == "adam":
            optimizer = torch.optim.Adam(train_params,
                                         weight_decay=args.weight_decay)
        else:
            raise Exception("We should never be here")

        if adjusting_learning_rate:
            print("=> Learning rate adjustment enabled.")
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, patience=args.adjust_lr_ep, verbose=True)
        # create new csv files with only header
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()
    print(model)
    print("=> model transferred to GPU.")
    epochs_since_best = 0
    train_results = []
    val_results = []
    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        res_train, res_train_inside, res_train_outside = train(
            train_loader, model, criterion, optimizer, epoch, args.print_freq,
            train_csv)
        train_results.append((res_train, res_train_inside, res_train_outside))
        # evaluate on validation set
        res_val, res_val_inside, res_val_outside, img_merge, _, _ = validate(
            val_loader, args.square_width, args.modality, output_directory,
            args.print_freq, test_csv, model, epoch, True)
        val_results.append((res_val, res_val_inside, res_val_outside))
        # remember best rmse and save checkpoint
        is_best = res_val.rmse < best_result.rmse
        if is_best:
            epochs_since_best = 0
            best_result = res_val
            write_results(best_txt, res_val, res_val_inside, res_val_outside,
                          epoch)
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)
        else:
            epochs_since_best += 1

        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)

        plot_progress(train_results, val_results, epoch, output_directory)

        if epochs_since_best > args.early_stop_epochs:
            print("early stopping")
        if adjusting_learning_rate:
            scheduler.step(res_val.rmse)
    return 0
def train(working_dir, grid_size, learning_rate, batch_size, num_cores):
    process = psutil.Process(os.getpid())
    print(process.memory_info().rss / 1024 / 1024 / 1024)
    train_feat_dict = get_feat_dict(working_dir + "/train_smiles.csv")
    val_feat_dict = get_feat_dict(working_dir + "/val_smiles.csv")
    test_feat_dict = get_feat_dict(working_dir + "/test_smiles.csv")
    # There are about 0.08 gb
    process = psutil.Process(os.getpid())
    print("pre model")
    print(process.memory_info().rss / 1024 / 1024 / 1024)

    torch.set_default_dtype(torch.float64)
    train_props, val_props, test_props = get_props(working_dir, dtype=int)
    print("pre model post props")
    print(process.memory_info().rss / 1024 / 1024 / 1024)
    model = ResNet(BasicBlock, [2, 2, 2, 2],
                   grid_size,
                   "classification",
                   feat_nums,
                   e_sizes,
                   num_classes=train_props.shape[1])
    model.float()
    model.cuda()
    print("model params")
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print(pytorch_total_params)
    model.cpu()
    print("model")
    print(process.memory_info().rss / 1024 / 1024 / 1024)
    loss_function = masked_cross_entropy
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    tl_list = []
    vl_list = []
    tmra_list = []
    vmra_list = []

    for file_num in range(num_loads):
        # Get new random walks
        if file_num == 0:
            print("before get_loaders")
            process = psutil.Process(os.getpid())
            print(process.memory_info().rss / 1024 / 1024 / 1024)
            train_loader, val_loader, test_loader = get_loaders(num_cores, \
                     working_dir, \
                     file_num, \
                     grid_size, \
                     batch_size, \
                     train_props, \
                     train_feat_dict, \
                     val_props=val_props, \
                     val_feat_dict=val_feat_dict, \
                     test_props=test_props, \
                     test_feat_dict=test_feat_dict)
        else:
            print("before get_loaders 2")
            process = psutil.Process(os.getpid())
            print(process.memory_info().rss / 1024 / 1024 / 1024)
            train_loader, _, _ = get_loaders(num_cores, \
                   working_dir, \
                   file_num, \
                   grid_size, \
                   batch_size, \
                   train_props, \
                   train_feat_dict)
        # Train on a single set of random walks, can do multiple epochs if desired
        for epoch in range(epochs_per_load):
            model.train()
            model.cuda()
            t = time.time()
            train_loss_list = []
            props_list = []
            outputs_list = []
            # change
            for i, (walks_int, walks_float, props) in enumerate(train_loader):
                walks_int = walks_int.cuda()
                walks_int = walks_int.long()
                walks_float = walks_float.cuda()
                walks_float = walks_float.float()
                props = props.cuda()
                props = props.long()
                props_list.append(props)
                outputs = model(walks_int, walks_float)
                outputs_list.append(outputs)
                loss = loss_function(props, outputs)
                train_loss_list.append(loss.item())
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            props = torch.cat(props_list, 0)
            props = props.cpu().numpy()
            outputs = torch.cat(outputs_list, 0)
            outputs = outputs.detach().cpu().numpy()
            # Get train rocauc value
            train_rocaucs = []
            for i in range(props.shape[1]):
                mask = props[:, i] != 2
                train_rocauc = roc_auc_score(props[mask, i], outputs[mask, i])
                train_rocaucs.append(train_rocauc)
            model.eval()
            with torch.no_grad():
                ds = val_loader.dataset
                walks_int = ds.int_feat_tensor
                walks_float = ds.float_feat_tensor
                props = ds.prop_tensor
                walks_int = walks_int.cuda()
                walks_int = walks_int.long()
                walks_float = walks_float.cuda()
                walks_float = walks_float.float()
                props = props.cuda()
                outputs = model(walks_int, walks_float)
                loss = loss_function(props, outputs)
                props = props.cpu().numpy()
                outputs = outputs.cpu().numpy()
                val_rocaucs = []
                for i in range(props.shape[1]):
                    mask = props[:, i] != 2
                    val_rocauc = roc_auc_score(props[mask, i], outputs[mask,
                                                                       i])
                    val_rocaucs.append(val_rocauc)
            print("load: " + str(file_num) + ", epochs: " + str(epoch))
            print("training loss")
            # Slightly approximate since last batch can be smaller...
            tl = statistics.mean(train_loss_list)
            print(tl)
            print("val loss")
            vl = loss.item()
            print(vl)
            print("train mean roc auc")
            tmra = sum(train_rocaucs) / len(train_rocaucs)
            print(tmra)
            print("val mean roc auc")
            vmra = sum(val_rocaucs) / len(val_rocaucs)
            print(vmra)
            print("time")
            print(time.time() - t)
            tl_list.append(tl)
            vl_list.append(vl)
            tmra_list.append(tmra)
            vmra_list.append(vmra)
            model.cpu()
        file_num += 1
        del train_loader
    save_plot(tl_list, vl_list, tmra_list, vmra_list)
    return model