def download_dataset(target_dataset, comet): """download and extract the dataset from GCS Parameters ---------- target_dataset : string `target_dataset` is the file name at the base of attached cloud storage eg (GCS: /data) """ data_paths = list(get_data_paths().values())[0] data_store = StoreManager(path=data_paths) logging.info('STARTING tar download') comet.log_dataset_info(name=target_dataset, version=None, path=data_paths) start = time.time() data_store.download_file(target_dataset) end = time.time() logging.info('DOWNLOAD time taken: ' + str(end - start)) comet.log_dataset_hash(target_dataset) if target_dataset.endswith('.tar.gz'): logging.info('STARTING untarring') tf = tarfile.open(target_dataset) tf.extractall() logging.info('COMPLETING untarring')
def get_data_loaders(batch_size): # Polyaxon # Get default data path data_path = list(get_data_paths().values())[0] data_dir = os.path.join(data_path, 'pytorch', 'mnist') train_dataset = datasets.MNIST(root=data_dir, train=True, download=True, transform=transforms.ToTensor()) x_train_mnist = train_dataset.train_data.type(torch.FloatTensor) y_train_mnist = train_dataset.train_labels test_dataset = datasets.MNIST(root=data_dir, train=False, download=True, transform=transforms.ToTensor()) x_test_mnist = test_dataset.test_data.type(torch.FloatTensor) y_test_mnist = test_dataset.test_labels logging.info('Training Data Size: ', x_train_mnist.size(), '-', y_train_mnist.size()) logging.info('Testing Data Size: ', x_test_mnist.size(), '-', y_test_mnist.size()) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) return train_loader, train_dataset, test_loader, test_dataset
def main(): parser = argparse.ArgumentParser() parser.add_argument("--num_epochs", default=40, type=int) parser.add_argument("--top_words", default=35000, type=int) parser.add_argument("--max_sequence_length", default=500, type=int) parser.add_argument("--batch_size", default=256, type=int) parser.add_argument("--polyaxon_env", default=0, type=int) arguments = parser.parse_args().__dict__ num_epochs = arguments.pop("num_epochs") top_words = arguments.pop("top_words") max_sequence_length = arguments.pop("max_sequence_length") batch_size = arguments.pop("batch_size") polyaxon_env = arguments.pop("polyaxon_env") if polyaxon_env: experiment = Experiment() data_path = get_data_paths()["data-local"] else: data_path = "/data" np.random.seed(7) bbc_data_dir = data_path + "/bbc-topic-classification/bbc_data/" glove_embedding_dir = (data_path + "/bbc-topic-classification/glove.6B.300d.txt") data = load_dataset(bbc_data_dir) glove_embeddings = load_glove_embeddings(glove_embedding_dir) preprocessing_pipeline = create_preprocessing_pipeline( top_words, max_sequence_length) train, test = train_test_split(data, test_size=0.25) X_train = preprocessing_pipeline.fit_transform(train.text) y_train = train["class"].values embedding_matrix = create_embedding_matrix(glove_embeddings, preprocessing_pipeline) model = create_model(embedding_matrix) model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size, shuffle=True) model.save("model.h5") joblib.dump(preprocessing_pipeline, "preprocessing_pipeline.pkl") X_test = preprocessing_pipeline.transform(test.text) y_test = test["class"].values metrics = model.evaluate(X_test, y_test) if polyaxon_env: experiment.outputs_store.upload_file("model.h5") experiment.outputs_store.upload_file("preprocessing_pipeline.pkl") experiment.log_metrics(loss=metrics[0], accuracy=metrics[1]) else: print("loss: {}, accuracy: {}".format(metrics[0], metrics[1]))
def get_data_path(alternative): if not is_in_cluster(): return alternative data_path = alternative data_paths = get_data_paths() if data_paths is not None and 'data' in data_paths: data_path = data_paths['data'] return data_path
def update_domain_adapt_datapath(args): # update data path if args.atp: args.dataDir = get_data_paths()['data-pool'] + '/DAST' else: args.dataDir = os.path.join(args.dataDir, 'data') print(args.dataDir) # target_data target_data_root = os.path.join(args.dataDir, args.dataset) args.target_train_path = os.path.join(target_data_root, 'train') args.target_valid_path = os.path.join(target_data_root, 'valid') args.target_test_path = os.path.join(target_data_root, 'test') # the vocabulary used for classifier evaluation args.target_vocab = os.path.join(target_data_root, 'vocab') # source data source_data_root = os.path.join(args.dataDir, args.source_dataset) args.source_train_path = os.path.join(source_data_root, 'train') args.source_valid_path = os.path.join(source_data_root, 'valid') args.source_test_path = os.path.join(source_data_root, 'test') # the vocabulary used for classifier evaluation args.source_vocab = os.path.join(source_data_root, 'vocab') # save the togather vocab in common root 'data/multi_vocab' args.multi_vocab = os.path.join( args.dataDir, '_'.join([args.source_dataset, args.dataset, 'multi_vocab'])) # update output path if args.save_samples: args.max_epochs = 1 if args.atp: args.modelDir = get_data_paths()['data-pool'] + '/DAST' print(args.modelDir) args.modelDir = os.path.join(args.modelDir, 'save_model') args.target_classifier_path = os.path.join(args.modelDir, 'classifier', args.dataset) args.source_classifier_path = os.path.join(args.modelDir, 'classifier', args.source_dataset) args.domain_classifier_path = os.path.join( args.modelDir, 'classifier', '_'.join([args.source_dataset, args.dataset, 'domain_adapt'])) args.styler_path = os.path.join(args.modelDir, 'domain_adapt_styler') return args
def __init__(self, param): super().__init__() #polyaxon data_dir = os.path.join( list(get_data_paths().values())[0], "lung/JSRT/preprocessed/") logging.info('DATA DIR = ' + data_dir) output_path = get_outputs_path() self.loss_function = param[0] self.network = param[1] self.routing_type = param[2] self.batch_size = 1 self.learning_rates = [1, 1] self.max_iter = 300000 self.test_iter = 10000 self.disp_iter = 100 self.snapshot_iter = self.test_iter self.test_initialization = False self.current_iter = 0 self.num_labels = 6 self.data_format = 'channels_first' #WARNING: Capsule might not work with channel last ! self.channel_axis = 1 self.save_debug_images = False self.base_folder = data_dir ##input folder self.image_size = [128, 128] self.image_spacing = [1, 1] self.output_folder = output_path + self.network.__name__ + '_' + self.output_folder_timestamp( ) ##output save self.dataset = Dataset(image_size=self.image_size, image_spacing=self.image_spacing, num_labels=self.num_labels, base_folder=self.base_folder, data_format=self.data_format, save_debug_images=self.save_debug_images) self.dataset_train = self.dataset.dataset_train() self.dataset_train.get_next() self.dataset_val = self.dataset.dataset_val() self.dice_names = list( map(lambda x: 'dice_{}'.format(x), range(self.num_labels))) self.additional_summaries_placeholders_val = dict([ (name, create_summary_placeholder(name)) for name in self.dice_names ]) if self.network.__name__ is 'network_ud': self.net_file = './Lung_Segmentation/LungSeg/cnn_network.py' elif self.network.__name__ is 'SegCaps_multilabels': self.net_file = './Lung_Segmentation/LungSeg/SegCaps/SegCaps.py' else: self.net_file = './Lung_Segmentation/LungSeg/capsule_network.py' self.files_to_copy = ['main_train_and_test.py', self.net_file]
def main(args): """ Runs dataLayer processing scripts to turn raw dataLayer from (../raw) into cleaned dataLayer ready to be analyzed (saved in ../processed). """ ## Talk to Rune about how dataLayer is handle. config = TrainingConfig() config = update_config(args, config) ## For polyaxon if config.run_polyaxon: input_root_path = Path(get_data_paths()['data']) #'data' output_root_path = Path(get_outputs_path()) inpainting_data_path = input_root_path / 'inpainting' os.environ['TORCH_HOME'] = str(input_root_path / 'pytorch_cache') config.data_path = inpainting_data_path config.output_path = output_root_path config.polyaxon_experiment = Experiment() pathToData = str(input_root_path / '/workspace/data_landset8/testImages') else: pathToData = Path(r"C:\Users\Morten From\PycharmProjects\testDAta") logger = logging.getLogger(__name__) logger.info('making final dataLayer set from raw dataLayer') logger.info(pathToData) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") B_size = 1 beta_test_path_list = glob(str(pathToData) + "/*/") ImageDict = get_dataset(beta_test_path_list, batch_size=B_size) train = ImageDict['train_dataloader'] test = ImageDict['test_dataloader'] genPath = r'C:\Users\Morten From\PycharmProjects\Speciale\Master_Satelite_Image_Inpainting\models\New_400.pth' outputPathImages = Path( r'C:\Users\Morten From\PycharmProjects\Speciale\Master_Satelite_Image_Inpainting\images' ) testGen = UnetGenerator(3, 3, 8) testGen.load_state_dict(torch.load(genPath)) testGen = testGen.to(device) testGen.eval() iterater = 0 for real, SAR in tqdm(train, position=0, leave=True, disable=True): batchOfImages = real.to(device) batchOfImagesSAR = SAR.to(device) outputs = testGen(batchOfImagesSAR) modelHelper.save_tensor_batchSAR( batchOfImages, batchOfImagesSAR, outputs, B_size, Path.joinpath(outputPathImages, 'iter' + str(iterater))) iterater = iterater + 1
def get_sync_dir(file): # 只改source_data就好 source_data = file sync_source_dir = os.path.join(get_data_paths()['ceph'], source_data.strip('/')) sync_dest_dir = os.path.join(get_data_paths()['host-path'], os.path.dirname(source_data.strip('/'))) # 确保同步目录存在, 防止拷贝文件时异常 if not os.path.exists(sync_dest_dir): cmd_line = "mkdir -p {0}".format(sync_dest_dir) subprocess.call(cmd_line.split()) data_dir = os.path.join(get_data_paths()['host-path'], source_data.strip('/')) if not os.path.exists(data_dir): # --info=progress2需要rsync3.1+的版本支持 cmd_line = "rsync -a {0} {1}".format(sync_source_dir, sync_dest_dir) subprocess.call(cmd_line.split()) return data_dir
def main(args): config = TrainingConfig() config = update_config(args, config) logger = logging.getLogger(__name__) if config.run_polyaxon: input_root_path = Path(get_data_paths()['data']) output_root_path = Path(get_outputs_path()) inpainting_data_path = input_root_path / 'inpainting' os.environ['TORCH_HOME'] = str(input_root_path / 'pytorch_cache') config.data_path = inpainting_data_path config.output_path = output_root_path model_path = inpainting_data_path / 'models' modelOutputPath = Path.joinpath(model_path, 'OutputModels') stores_output_path = config.output_path / 'data' / 'storedData' else: localdir = Path().absolute().parent modelOutputPath = Path.joinpath(localdir, 'OutputModels') stores_output_path = localdir / 'data' / 'storedData' #Import test data test = eval_model(config) test.run_eval(modelOutputPath, stores_output_path)
log_level = tf.logging.INFO elif log_level == 'DEBUG': log_level = tf.logging.DEBUG elif log_level == 'WARN': log_level = tf.logging.WARN else: log_level = 'INFO' tf.logging.set_verbosity(log_level) set_logging(get_log_level()) experiment = Experiment() vm_paths = list(get_data_paths().values())[0] data_paths = "{}/SSD/tfrecords".format(vm_paths) checkpointpath = "{}/SSD.checkpoints/ssd_300_vgg.ckpt".format(vm_paths) TRAIN_DIR = get_outputs_path() slim = tf.contrib.slim DATA_FORMAT = 'NHWC' # =========================================================================== # # SSD Network flags. # =========================================================================== # tf.app.flags.DEFINE_float('loss_alpha', 1., 'Alpha parameter in the loss function.')
def main(config): logging.basicConfig(level=logging.INFO) logging.info("STARTING PROGRAM") if config.TRAIN.POLYAXON: from polyaxon_client.tracking import Experiment, get_data_paths, get_outputs_path data_dir = get_data_paths() config.DATASET.OUTPUT_PATH = get_outputs_path() config.DATASET.PATH = os.path.join(data_dir['data1'], config.DATASET.PATH_NAS) model_path = os.path.join(data_dir['data1'], config.MODEL.PRETRAINED_NAS) logger = logging.getLogger() logger.setLevel(logging.INFO) logger.addHandler( logging.FileHandler( os.path.join(config.DATASET.OUTPUT_PATH, 'Heatmaps_from_human_joints.log'))) # Polyaxon experiment = Experiment() else: logger = logging.getLogger() logger.setLevel(logging.INFO) logger.addHandler( logging.FileHandler( os.path.join(config.DATASET.OUTPUT_PATH, 'Heatmaps_Resnet101.log'))) model_path = config.MODEL.PRETRAINED trainloader, valloader = utils.load_split_train_val( config.DATASET.PATH, "train", "validation", config) print('batch size', config.TRAIN.BATCH_SIZE) print('dataset', config.DATASET.PATH_NAS) print("weights", config.TRAIN.UPDATE_WEIGHTS) print("Model: ", model_path) print("LR: ", config.TRAIN.LR) model = utils.model_pose_resnet.get_pose_net(model_path, is_train=True) model.eval() for name, parameter in model.named_parameters(): parameter.requires_grad = config.TRAIN.UPDATE_WEIGHTS if "deconv" in name or "final" in name: parameter.requires_grad = True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") optimizer = optim.Adam(model.parameters(), lr=config.TRAIN.LR) model.to(device) # Decay LR by a factor of 0.1 every 3 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.01) writer = SummaryWriter(config.DATASET.OUTPUT_PATH) best_acc = 0 for epoch in range(config.TRAIN.END_EPOCH): criterion = nn.MSELoss() logger.info('Epoch {}/{}'.format(epoch, config.TRAIN.END_EPOCH - 1)) logger.info('-' * 10) acc = utils.AverageMeter() batch_loss = utils.AverageMeter() for i, (inputs, labels) in enumerate(trainloader): inputs, labels = inputs.to(device), labels.to(device) # print(summary(model, tuple(inputs.size())[1:])) logps = model.forward(inputs) criterion = nn.MSELoss() loss = criterion(logps, labels.float()) batch_loss.update(loss.item(), inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() _, avg_acc, cnt, pred, target, dists = utils.accuracy( logps.detach().cpu().numpy(), labels.detach().cpu().numpy(), thr=config.TRAIN.THRESHOLD) print("Current batch accuracy: ", avg_acc) acc.update(avg_acc, cnt) print("Batch {} train accurcy: {}, loss: {}".format( i, acc.avg, batch_loss.avg)) writer.add_scalar('Loss/train', float(batch_loss.avg), epoch) val_acc = run_val(model, valloader, device, criterion, writer, epoch, config) logger.info( 'Train Loss: {:.4f} Train Acc: {:.4f} Val Acc: {:.4f}'.format( batch_loss.avg, acc.avg, val_acc)) if val_acc > best_acc: best_acc = val_acc logging.info("best val at epoch: " + str(epoch)) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': batch_loss.avg, }, os.path.join(config.DATASET.OUTPUT_PATH, "best_model.pt")) if epoch % 250 == 0: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': batch_loss.avg, }, os.path.join(config.DATASET.OUTPUT_PATH, "model" + str(epoch) + ".pt")) logger.info('Best val Acc: {:4f}'.format(best_acc))
args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() # Polyaxon experiment = Experiment() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) model = Network() if args.cuda: model.cuda() data_dir = os.path.join( list(get_data_paths().values())[0], 'pytorch', 'mnist') logging.info('Downloading data ...') train_loader = get_train_loader(data_dir, args.batch_size, args.cuda) test_loader = get_test_loader(data_dir, args.test_batch_size, args.cuda) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) logging.info('Start training ...') for epoch in range(1, args.epochs + 1): train(model=model, train_loader=train_loader, epoch=epoch, cuda=args.cuda, optimizer=optimizer, log_interval=args.log_interval)
def main(): parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=1000, metavar='N', help='input batch size for training (default: 1000)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=15, metavar='N', help='number of epochs to train (default: 9)') parser.add_argument('--lr', type=float, default=1.0, metavar='LR', help='learning rate (default: 1.0)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=42, metavar='S', help='random seed (default: 42)') args = parser.parse_args() experiment = Experiment() logger = logging.getLogger('main') logger.setLevel(get_log_level()) use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") logger.info('%s', device) kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( get_data_paths()['mnist'], train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( get_data_paths()['mnist'], train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) model = Net().to(device) model_path = os.path.join(get_outputs_path(), 'model.p') state_path = os.path.join(get_outputs_path(), 'state.json') start = 1 if os.path.isfile(model_path): model.load_state_dict(torch.load(model_path)) logger.info('%s', 'Model Loaded') if os.path.isfile(state_path): with open(state_path, 'r') as f: data = json.load(f) start = data['epoch'] logger.info('%s', 'State Loaded') optimizer = optim.SGD(model.parameters(), lr=args.lr) with SummaryWriter(log_dir=get_outputs_path()) as writer: for epoch in range(start, args.epochs + 1): train(epoch, writer, experiment, args, model, device, train_loader, optimizer) test(epoch, writer, experiment, args, model, device, test_loader) torch.save(model.state_dict(), model_path) with open(state_path, 'w') as f: data = {'epoch': epoch} json.dump(data, f)
def main(args): """ Runs dataLayer processing scripts to turn raw dataLayer from (../raw) into cleaned dataLayer ready to be analyzed (saved in ../processed). """ ## Talk to Rune about how dataLayer is handle. config = TrainingConfig() config = update_config(args, config) ## For polyaxon if config.run_polyaxon: input_root_path = Path(get_data_paths()['data']) #'data' output_root_path = Path(get_outputs_path()) inpainting_data_path = input_root_path / 'inpainting' os.environ['TORCH_HOME'] = str(input_root_path / 'pytorch_cache') config.data_path = inpainting_data_path config.output_path = output_root_path config.polyaxon_experiment = Experiment() pathToData = str(input_root_path / '/workspace/data_landset8/testImages') else: pathToData = Path(r"C:\Users\Morten From\PycharmProjects\testDAta") testPathData = Path( r'/workspace/data_landset8/unzipped/GrassCrops/BC/LC81820302014180LGN00' ) #S1A_20201005_034656_DSC_109_RGBsar_cog.tif #S2B_MSIL2A_20201002T090719_N0214_R050_T35TMH_20201002T113443_B02_cog #S2B_MSIL2A_20201002T090719_N0214_R050_T35TMH_20201002T113443_B03_cog.tif #S2B_MSIL2A_20201002T090719_N0214_R050_T35TMH_20201002T113443_B04_cog.tif logger = logging.getLogger(__name__) logger.info('making final dataLayer set from raw dataLayer') logger.info(pathToData) ImageDict = get_dataset(pathToData, batch_size=config.batch_size) train = ImageDict['train_dataloader'] test = ImageDict['test_dataloader'] #Kører begge på Wgan loop lige nu if config.model_name == 'PartialConvolutions': curtraingModel = trainInpaintingWgan(train, test, generator, discriminator, config) local_model_path = curtraingModel.trainGAN() elif config.model_name == 'PartialConvolutionsWgan': curtraingModel = trainInpaintingWgan(train, test, generator, criticWgan, config) local_model_path = curtraingModel.trainGAN() #local_model_path = Path(r"C:\Users\panda\PycharmProjects\Image_Inpainting_Sat\Master_Satelite_Image_Inpainting\OutputModels\PartialConvolutionsWgan_200.pt") if config.run_polyaxon: model_path = inpainting_data_path / 'models' modelOutputPath = Path.joinpath(model_path, 'OutputModels') stores_output_path = config.output_path / 'data' / 'storedData' else: localdir = Path().absolute().parent modelOutputPath = Path.joinpath(localdir, 'OutputModels') stores_output_path = localdir / 'data' / 'storedData' curevalModel = eval_model(config) curevalModel.run_eval(modelOutputPath, stores_output_path, model_path=local_model_path, test_dataloader=test)
def train_net(net, epochs=5, batch_size=1, lr=0.003, val_percent=0.20, loss_lambda=5, save_cp=True, gpu=False, img_scale=0.5, expositions_num=15, logg_freq=15, tb=False, w_decay=0.0005, use_notifications=False, polyaxon=False, outputs_path='checkpoints'): # === Localize training data =================================================== if polyaxon: data_paths = get_data_paths() dir_checkpoints = get_outputs_path() dataSets_dir = os.path.join(data_paths['data1'], 'eprado', 'USLDR-DataSet') #dataSets_dir = os.path.join(data_paths['data1'] , 'eprado', 'LDR_DataSet') else: dataSets_dir = os.path.join(wk_dir, "LDR_DataSet") dir_checkpoints = os.path.join(wk_dir, outputs_path) print('Dataset_dir', dataSets_dir) print('Outputs_path', dir_checkpoints) experiment_id = datetime.datetime.now().strftime('%d%m_%H%M_') experiment_name = 'ExpandnetL_psn_{}_bs{}_lr{}_exps{}'.format( experiment_id, batch_size, lr, expositions_num) dir_img = os.path.join(dataSets_dir, 'Org_images/') dir_compressions = os.path.join(dataSets_dir, 'c_images/') dir_mask = os.path.join(dataSets_dir, 'c_images/') #if tb: #dummy_input = torch.rand(1, 3, 128, 128) #writer.add_graph(net, (dummy_input,)) #writer.close() # === Load Training/Validation data ===================================================== ids = get_ids(dir_compressions) # Split into train test idsset = list(ids) kf = KFold(n_splits=5, shuffle=False) #print('Train splits: ',kf.get_n_splits(dataset)) best_psnr_m = 0 best_psnr_hvs = 0 #for train_index, test_index in kf.split(idsset): iddataset = split_train_val(idsset, expositions_num, val_percent) #test_set = [] #for im_id in test_index: # for e in range(expositions_num): # test_set.append(idsset[im_id]) N_train = len(iddataset['train']) N_val = len(iddataset['val']) N_test = 0 #len(test_set) #=====CHOOSE Loss Criterion============================================================= #criterion = nn.MSELoss(reduction='mean') criterion = ExpandNetLoss(loss_lambda=loss_lambda) optimizer = optim.Adagrad(net.parameters(), lr=lr, lr_decay=0.000001, weight_decay=w_decay) #optimizer = optim.SGD(net.parameters(), # lr=lr, # momentum=0.9, # weight_decay=0.0005) since = time.time() print(''' Training SETUP: Epochs: {0:} Batch size: {1:} Optimizer: Adagrad Learning rate: {2:} Weight decay: {3:} Training size: {4:} Validation size: {5:} Test size: {6:} Checkpoints: {7:} CUDA: {8:} '''.format(epochs, batch_size, lr, w_decay, N_train, N_val, N_test, str(save_cp), str(gpu))) train_dataset = HdrDataset(iddataset['train'], dir_compressions, dir_mask, expositions_num) val_dataset = HdrDataset(iddataset['val'], dir_compressions, dir_mask, expositions_num) #test_dataset = HdrDataset(test_set, dir_compressions, dir_mask,expositions_num) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False) val_data_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=False) #test_data_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=True) best_hvsm = 0.0 global_psnr_m = [] global_psnr_hvs = [] for epoch in range(epochs): print('\n') print('{}{}{}'.format('+', '=' * 78, '+')) print('| Starting epoch {}/{}. {}'.format(epoch + 1, epochs, (' ' * 57) + '|')) print('{}{}{}'.format('|', '-' * 78, '|')) begin_of_epoch = time.time() tot_steps = math.trunc(N_train / batch_size) net.train() train_loss = 0 losses = [] val_loss = 0 step = 0 train_sample = [] train_acc = 0 val_hvsm = 0 val_hvs = 0 model_pnsr_m = 0 for i, b in enumerate(train_data_loader): step += 1 imgs, true_masks, imgs_ids = b['input'], b['target'], b['id'] #print(i, b['input'].size(), b['target'].size()) #input: [15, 3, 224, 224]), target: [15, 3, 224, 224] #print('>>>>>>> Input max: ' , torch.max(imgs[0])) #print('>>>>>>> mask max : ', torch.max(true_masks[0])) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() else: print(' GPU not available') # Predicted mask images optimizer.zero_grad() prediction = net(imgs) #prediction shape: [B, 3, 224, 224] #cost, cost_input_output = Hdr_loss(imgs, true_masks, prediction, sep_loss=False, gpu=gpu, tb=tb) cost = criterion(prediction, true_masks) #loss is torch tensor losses.append(cost.item()) train_loss = np.mean(losses) cost.backward() optimizer.step() if step == 1 or step % logg_freq == 0: #print('| Step: {0:}, cost:{1:}, Train Loss:{2:.9f}, Train Acc:{3:.9f}'.format(step,cost, train_loss,train_acc/step)) print('| Step: {0:}, cost:{1:}, Train Loss:{2:.9f}'.format( step, cost, train_loss)) #Last Step of this Epoch if step == math.trunc(tot_steps): num_in_batch = random.randrange(imgs.size(0)) train_sample_name = imgs_ids[num_in_batch] train_sample = [ imgs[num_in_batch], true_masks[num_in_batch], prediction[num_in_batch] ] t_exp_name = 'Train_' + experiment_name saveTocheckpoint(dir_checkpoints, t_exp_name, train_sample_name, epoch, train_sample[0], train_sample[1], train_sample[2]) if tb: print( '| saving train step {0:} sample : input,target & pred' .format(step)) grid = torchvision.utils.make_grid(train_sample, nrow=3) writer.add_image('train_sample', grid, 0) #if epoch == 1 or epoch % 15 == 0 or epoch == epochs: val_loss, val_hvsm, val_hvs = eval_hdr_net(net, dir_checkpoints, experiment_name, val_data_loader, criterion, epoch, gpu, batch_size, expositions_num=15, tb=tb) if tb: writer.add_scalar('training_loss: ', train_loss, epoch) writer.add_scalar('validation_loss', val_loss, epoch) writer.add_scalar('val_hvsm', val_hvsm, epoch) writer.add_scalar('val_hvs', val_hvs, epoch) writer.add_scalars('losses', { 'training_loss': train_loss, 'val_loss': val_loss }, epoch) if polyaxon: experiment.log_metrics(step=epoch, training_loss=train_loss, validation_loss=val_loss, val_hvsm=val_hvsm, val_hvs=val_hvs) print('{}{}{}'.format('+', '=' * 78, '+')) print('| {0:} Epoch {1:} finished ! {2:}|'.format( ' ' * 28, (epoch + 1), ' ' * 29)) print('{}{}{}'.format('+', '-' * 78, '+')) print('| Summary: Train Loss: {0:0.07}, Val Loss:{1:}'.format( train_loss, val_loss)) print('| Avrg psnr-hvs_m :{0:0.04},Avrg psnr-hvs :{1:0.04}'. format(val_hvsm, val_hvs)) time_epoch = time.time() - begin_of_epoch print('| Epoch ETC: {:.0f}m {:.0f}s'.format(time_epoch // 60, time_epoch % 60)) print('{}{}{}'.format('+', '=' * 78, '+')) if save_cp and (val_hvsm > best_hvsm): best_hvsm = val_hvsm model_path = os.path.join(dir_checkpoints, 'BestCP.pth') torch.save(net.state_dict(), model_path) print('Checkpoint saved !') global_psnr_hvs.append(val_hvs) global_psnr_m.append(val_hvsm) ''' test_psnr_m, test_psnr_hvs = test_hdr_net(model_path,dir_checkpoints, experiment_name, test_data_loader, criterion,gpu,tb) if save_cp and (test_psnr_m > best_psnr_m): best_psnr_m = test_psnr_m best_model_path = os.path.join(dir_checkpoints, 'Best_CP.pth') torch.save(net.state_dict(),best_model_path) print('Best model saved !') ''' print('>' * 80) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Final Average psnr-hvs_m: {:.0f}, psnr-hvs: {:.0f}'.format( np.mean(global_psnr_m), np.mean(global_psnr_hvs))) if tb: writer.close() if use_notifications: end_msg = "train.py finished at: {}(".format( str(datetime.datetime.now())) push = pb.push_note("usHDR: Finish", end_msg)
parser.add_argument('--batch-norm-epsilon', type=float, default=1e-5, help='Epsilon for batch norm.') args = parser.parse_args() if args.num_gpus < 0: raise ValueError( 'Invalid GPU count: \"--num-gpus\" must be 0 or a positive integer.' ) if args.num_gpus == 0 and args.variable_strategy == 'GPU': raise ValueError( 'num-gpus=0, CPU must be used as parameter server. Set' '--variable-strategy=CPU.') if (args.num_layers - 2) % 6 != 0: raise ValueError('Invalid --num-layers parameter.') if args.num_gpus != 0 and args.train_batch_size % args.num_gpus != 0: raise ValueError('--train-batch-size must be multiple of --num-gpus.') if args.num_gpus != 0 and args.eval_batch_size % args.num_gpus != 0: raise ValueError('--eval-batch-size must be multiple of --num-gpus.') # Polyaxon data_dir = os.path.join( list(get_data_paths().values())[0], 'cifar-10-data') # We create data for the project if it does not exists if not os.path.exists(os.path.join(data_dir, 'train.tfrecords')): generate_data(data_dir) # Polyaxon train(job_dir=get_outputs_path(), data_dir=data_dir, **vars(args))
def main(args): """ Runs dataLayer processing scripts to turn raw dataLayer from (../raw) into cleaned dataLayer ready to be analyzed (saved in ../processed). """ ## Talk to Rune about how dataLayer is handle. config = TrainingConfig() config = update_config(args,config) logger = logging.getLogger(__name__) logger.info('making final dataLayer set from raw dataLayer') ## For polyaxon if config.run_polyaxon: input_root_path = Path(get_data_paths()['data']) output_root_path = Path(get_outputs_path()) inpainting_data_path = input_root_path / 'inpainting' os.environ['TORCH_HOME'] = str(input_root_path / 'pytorch_cache') config.data_path = inpainting_data_path config.output_path = output_root_path config.polyaxon_experiment = Experiment() curdatLayer = importData(config) train_array,names = curdatLayer.get_images_for_baseLine() print("Total test in baseline " +str(len(train_array))) print("Total test names in baseline" +str(len(names))) train_dataloader,test_dataloader = curdatLayer.getRGBDataLoader() local_train_array = [] for i in train_array: local_train_array.append(convertToFloat32(i)) train_array = local_train_array curBaseLineModel = baselineModel(train_array,names,config) pathToGenerated, time_ran = curBaseLineModel.baselineExperiment() #pathToGenerated = r"C:\Users\panda\PycharmProjects\Image_Inpainting_Sat\Master_Satelite_Image_Inpainting\data\generated\test_baseLine\22_11_2020_13_01_28" if config.run_polyaxon: pathToEval=config.output_path /'evalMetrics' else: pathToEval = Path().absolute().parent / 'models' # create dataloader with generated images generated_images_dataloader = curdatLayer.getGeneratedImagesDataloader(pathToGenerated) print(str(pathToGenerated) + "is the generated image path") # calculate FID #missing gen print("Done with generating images") FID_Value = FIDCalculator(test_dataloader, generated_images_dataloader, len(test_dataloader) * config.batch_size, config.batch_size,config).get_FID_scores() # Calculate PSNR and SSIM dataloader_iterator = iter(generated_images_dataloader) maeValues = [] sddValues = [] ssimscikitValues= [] SSIMValues = [] psnrValues = [] CCValues = [] rmseValues = [] # loop to calculate PSNR and SSIM for all test and generated images. for images_real in test_dataloader: try: images_generated = next(dataloader_iterator) except StopIteration: dataloader_iterator = iter(generated_images_dataloader) images_generated = next(dataloader_iterator) for index2 in range(config.batch_size): psnrValues.append(PSNR().__call__(images_real[index2], images_generated[index2])) CCValues.append(CC().__call__(images_real[index2], images_generated[index2])) maeValues.append(MSE().__call__(images_real[index2], images_generated[index2])) sddValues.append(SDD.__call__(images_real[index2], images_generated[index2])) ssimscikitValues.append(SSIM_SKI.__call__(images_real[index2], images_generated[index2])) image1 = images_real[index2].unsqueeze(0) image2 = images_generated[index2].unsqueeze(0) SSIMValues.append(ssim(image1, image2)) rmseValues.append(RMSE.__call__(images_real[index2], images_generated[index2])) meanMAE = sum(maeValues) / len(maeValues) minMAE = min(maeValues) maxMAE = max(maeValues) meanSDD = sum(sddValues) / len(sddValues) minSDD = min(sddValues) maxSDD = max(sddValues) meanPSNR = sum(psnrValues) / len(psnrValues) minPSNR = min(psnrValues) maxPSNR = max(psnrValues) meanSSIM = sum(SSIMValues) / len(SSIMValues) minSSIM = min(SSIMValues) maxSSIM = max(SSIMValues) meanSCISSIM = sum(ssimscikitValues) / len(ssimscikitValues) minSCISSIM = min(ssimscikitValues) maxSCISSIM = max(ssimscikitValues) meanCC = sum(CCValues) / len(CCValues) minCC = min(CCValues) maxCC = max(CCValues) meanRMSE = sum(rmseValues) / len(rmseValues) minRMSE = min(rmseValues) maxRMSE = max(rmseValues) # Save final results of evaluation metrics FID = FID_Value if not pathToEval.parent.exists(): pathToEval.parent.mkdir() #saveEvalToTxt(config.model_name,meanPSNR.item(),minPSNR,maxPSNR,meanSSIM.item(), minSSIM,maxSSIM ,FID ,time, pathToEval) saveEvalToTxt(config.model_name, meanMAE, minMAE, maxMAE, meanSDD, minSDD, maxSDD, meanSSIM.item(), minSSIM.item(), maxSSIM.item(),meanSCISSIM,minSCISSIM,maxSCISSIM, meanPSNR, minPSNR, maxPSNR, meanCC, minCC, maxCC, meanRMSE, minRMSE, maxRMSE, FID_Value, time_ran, pathToEval)
help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) model = Network() if args.cuda: model.cuda() if settings.IN_CLUSTER: data_dir = get_data_paths()['data'] else: data_dir = '/tmp/plx/data' experiment = Experiment() logging.info('Downloading data ...') train_loader = get_train_loader(data_dir, args.batch_size, args.cuda) test_loader = get_test_loader(data_dir, args.test_batch_size, args.cuda) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) logging.info('Start training ...') for epoch in range(1, args.epochs + 1): train(model=model, train_loader=train_loader, epoch=epoch, cuda=args.cuda, optimizer=optimizer,
def main(): datasets.MNIST(get_data_paths()['mnist'], train=True, download=True) datasets.MNIST(get_data_paths()['mnist'], train=False, download=True)
def main(args): """ Runs dataLayer processing scripts to turn raw dataLayer from (../raw) into cleaned dataLayer ready to be analyzed (saved in ../processed). """ ## Talk to Rune about how dataLayer is handle. config = TrainingConfig() config = update_config(args, config) ## For polyaxon if config.run_polyaxon: input_root_path = Path(get_data_paths()['data']) output_root_path = Path(get_outputs_path()) inpainting_data_path = input_root_path / 'inpainting' os.environ['TORCH_HOME'] = str(input_root_path / 'pytorch_cache') config.data_path = inpainting_data_path config.output_path = output_root_path config.polyaxon_experiment = Experiment() logger = logging.getLogger(__name__) logger.info('making final dataLayer set from raw dataLayer') curdatLayer = importData(config) if config.nir_data: train, test_dataloader = curdatLayer.getNIRDataLoader() else: train, test_dataloader = curdatLayer.getRGBDataLoader() local_model_path = r"C:\Users\panda\PycharmProjects\Image_Inpainting_Sat\Master_Satelite_Image_Inpainting\OutputModels\PartialConvolutionsWgan_301.pt" local_output_path = Path(r"E:\Speciale\final_model") #gen = Wgangenerator().to(config.device) if config.nir_data: gen = generatorNIR().to(config.device) else: gen = generator().to(config.device) gen.load_state_dict( torch.load(local_model_path)) ## Use epochs to identify model number gen.eval() loadAndAgumentMasks = makeMasks.MaskClass(config, rand_seed=None, evaluation=True, noFlip=True) names = [] # Find names of test images, in order to save the generated files with same name, for further reference localImg = test_dataloader.dataset.image_list # Slice string to only include the name of the file, ie after the last // localNames = [] # if self.config.run_polyaxon: # split_path = localImg[0].split('/') ##Linux # else: # split_path = localImg[0].split("\\") # local_index= split_path.index('processed') # local_country= split_path[local_index+1] for i in localImg: if config.run_polyaxon: selected_image = i.split('/')[-1] ##Linux else: selected_image = i.split("\\")[-1] localNames.append(selected_image) names = names + localNames print("Found this many names " + str(len(names))) current_number = 0 if not os.path.exists(Path.joinpath(local_output_path, config.model_name)): os.makedirs(Path.joinpath(local_output_path, config.model_name)) now = datetime.now() dt_string = now.strftime("%d_%m_%Y_%H_%M_%S") local_test_path = local_output_path / config.model_name / dt_string / 'Data' local_test_nir_path = local_output_path / config.model_name / dt_string / 'DataNir' local_store_path = local_output_path / config.model_name / dt_string / 'stored_Data' os.makedirs(local_test_path) if config.nir_data: os.makedirs(local_test_nir_path) start_time = datetime.now() for real in tqdm(test_dataloader, disable=config.run_polyaxon): masks = loadAndAgumentMasks.returnTensorMasks(config.batch_size) masks = torch.from_numpy(masks) masks = masks.type(torch.cuda.FloatTensor) masks = 1 - masks masks.to(config.device) real = real.to(config.device) fake_masked_images = torch.mul(real, masks) generated_images = gen(fake_masked_images, masks) image_names = names[current_number:current_number + config.batch_size] current_number = current_number + config.batch_size ## Change naming to include all names # modelHelper.save_tensor_batch(generated_images,fake_masked_images,config.batch_size,path) if config.nir_data: for index, image in enumerate(generated_images): namePath = Path.joinpath(local_test_path, image_names[index]) if config.nir_data: modelHelper.save_tensor_single_NIR( image, Path.joinpath(local_test_path, image_names[index]), Path.joinpath(local_test_nir_path, image_names[index]), raw=True) else: modelHelper.save_tensor_batch( real, fake_masked_images, generated_images, config.batch_size, Path.joinpath(local_test_path, "_final_model_" + str(current_number))) current_number = current_number + 1 end_time = datetime.now()
def main(args): config = TrainingConfig() config = update_config(args, config) logger = logging.getLogger(__name__) if config.run_polyaxon: input_root_path = Path(get_data_paths()['data']) output_root_path = Path(get_outputs_path()) inpainting_data_path = input_root_path / 'inpainting' os.environ['TORCH_HOME'] = str(input_root_path / 'pytorch_cache') config.data_path=inpainting_data_path config.output_path=output_root_path imageOutputPath = config.data_path /'data' /'generated' model_path =inpainting_data_path /'models' modelOutputPath = Path.joinpath(model_path, 'OutputModels') stores_output_path = config.output_path /'data'/'storedData' else: imageOutputPath = Path().absolute().parent /'data' /'generated' localdir = Path().absolute().parent modelOutputPath = Path.joinpath(localdir, 'OutputModels') stores_output_path = localdir /'data'/'storedData' #Import test data test = eval_model(config) test.run_eval(modelOutputPath,stores_output_path) curdatLayer = importData(config) train, test_dataloader = curdatLayer.getRGBDataLoader() del train test = Path.joinpath(modelOutputPath, config.model_name + '_'+str(config.epochs) + '.pt') print(Path.joinpath(modelOutputPath, config.model_name + '_'+str(config.epochs) + '.pt')) if Path.exists(Path.joinpath(modelOutputPath, config.model_name + '_'+str(config.epochs) + '.pt')): ##Hvis det er med wgan generator, altså layernorm, indsæt Wgangenerator istedet for generator() gen = generator().to(config.device) gen.load_state_dict(torch.load(Path.joinpath(modelOutputPath, config.model_name + '_'+str(config.epochs) + '.pt'))) ## Use epochs to identify model number else: print("Unable to find path to model") gen.eval() loadAndAgumentMasks = makeMasks.MaskClass(config,rand_seed=None,evaluation=True) names = [] for i in range(len(test_dataloader.dataset.datasets)): # Find names of test images, in order to save the generated files with same name, for further reference localImg = test_dataloader.dataset.datasets[i].image_list # Slice string to only include the name of the file, ie after the last // localNames = [] for i in localImg: if config.run_polyaxon: selected_image=i.split('/')[-1] ##Linux else: selected_image=i.split("\\")[-1] localNames.append(selected_image) names=names+localNames print("Found this many names "+str(len(names))) current_number = 0 if not os.path.exists(Path.joinpath(imageOutputPath, config.model_name)): os.makedirs(Path.joinpath(imageOutputPath, config.model_name)) now = datetime.now() dt_string = now.strftime("%d_%m_%Y_%H_%M_%S") local_test_path= imageOutputPath / config.model_name / dt_string /'Data' local_store_path = stores_output_path / config.model_name / dt_string /'stored_Data' os.makedirs(local_test_path) os.makedirs(local_store_path) start_time = datetime.now() testCount = 3 for real in tqdm(test_dataloader): masks = loadAndAgumentMasks.returnTensorMasks(config.batch_size) masks = torch.from_numpy(masks) masks = masks.type(torch.cuda.FloatTensor) masks = 1 - masks masks.to(config.device) real = real.to(config.device) fake_masked_images = torch.mul(real, masks) generated_images = gen(fake_masked_images, masks) image_names = names[current_number:current_number+config.batch_size] current_number = current_number + config.batch_size ## Change naming to include all names #modelHelper.save_tensor_batch(generated_images,fake_masked_images,config.batch_size,path) for index, image in enumerate(generated_images): namePath= Path.joinpath(local_test_path,image_names[index]) modelHelper.save_tensor_single(image,Path.joinpath(local_test_path, image_names[index]),raw=True) if testCount<0: break testCount = testCount-1 print("Saved image to " +str(local_test_path)) end_time = datetime.now() time_ran = str(end_time - start_time) #create dataloader with generated images generated_images_dataloader = curdatLayer.getGeneratedImagesDataloader(local_test_path) #calculate FID FID_Value = FIDCalculator(test_dataloader,generated_images_dataloader, len(test_dataloader)*config.batch_size, config.batch_size,config).get_FID_scores() #Calculate PSNR and SSIM dataloader_iterator = iter(generated_images_dataloader) psnrValues = [] maeValues = [] sddValues=[] ##ssimValues=[] SSIMValues = [] CCValues = [] #loop to calculate PSNR and SSIM for all test and generated images. for images_real in test_dataloader: try: images_generated = next(dataloader_iterator) except StopIteration: dataloader_iterator = iter(generated_images_dataloader) images_generated = next(dataloader_iterator) for index2 in range(config.batch_size): psnrValues.append(PSNR().__call__(images_real[index2], images_generated[index2])) ##CCValues.append(CC().__call__(images_real[index2], images_generated[index2])) maeValues.append(MSE().__call__(images_real[index2], images_generated[index2])) sddValues.append(SDD.__call__(images_real[index2], images_generated[index2])) ##ssimValues.append(SSIM.__call__(images_real[index2], images_generated[index2])) image1 = images_real[index2].unsqueeze(0) image2 = images_generated[index2].unsqueeze(0) SSIMValues.append(ssim(image1, image2)) break meanMAE= sum(maeValues)/len(maeValues) minMAE = min(maeValues) maxMAE = max(maeValues) meanSDD = sum(sddValues) / len(sddValues) minSDD = min(sddValues) maxSDD = max(sddValues) meanPSNR = sum(psnrValues)/len(psnrValues) minPSNR = min(psnrValues) maxPSNR = max(psnrValues) meanSSIM = sum(SSIMValues) / len(SSIMValues) minSSIM = min(SSIMValues) maxSSIM = max(SSIMValues) meanCC = sum(CCValues) / len(CCValues) minCC = min(CCValues) maxCC = max(CCValues) #Save final results of evaluation metrics saveEvalToTxt(config.model_name,meanMAE,minMAE,maxMAE,meanSSIM,minSDD,maxSDD,meanSSIM,minSSIM,maxSSIM,PSNR.item(),minPSNR.item(),maxPSNR.item(),meanCC.item(),minCC.item(),maxCC.item(),FID_Value,time_ran, local_store_path) #Clean modelHelper.clearFolder(local_test_path.parent)
def load_arguments(): argparser = argparse.ArgumentParser(sys.argv[0]) # data path argparser.add_argument('--dataDir', type=str, default='') argparser.add_argument( '--dataset', type=str, default='', help='if doman_adapt enable, dataset means target dataset') argparser.add_argument('--modelDir', type=str, default='') argparser.add_argument('--logDir', type=str, default='') # general model setting argparser.add_argument('--learning_rate', type=float, default=0.0005) argparser.add_argument('--batch_size', type=int, default=64) argparser.add_argument('--pretrain_epochs', type=int, default=10, help='max pretrain epoch for LM.') argparser.add_argument('--max_epochs', type=int, default=20) argparser.add_argument('--max_len', type=int, default=20, help='the max length of sequence') argparser.add_argument('--noise_word', action='store_true', help='whether add noise in enc batch.') argparser.add_argument('--trim_padding', action='store_true', help='whether trim the padding in each batch.') argparser.add_argument( '--order_data', action='store_true', help='whether order the data according the length in the dataset.') # CNN model argparser.add_argument('--filter_sizes', type=str, default='1,2,3,4,5') argparser.add_argument('--n_filters', type=int, default=128) argparser.add_argument( '--confidence', type=float, default=0.8, help='The classification confidence used to filter the data') # style transfer model argparser.add_argument('--network', type=str, default='', help='The style transfer network path') argparser.add_argument( '--rho', # loss_rec + rho * loss_adv type=float, default=1) argparser.add_argument( '--gamma_init', # softmax(logit / gamma) type=float, default=0.1) argparser.add_argument('--gamma_decay', type=float, default=1) argparser.add_argument('--gamma_min', type=float, default=0.1) argparser.add_argument('--beam', type=int, default=1) argparser.add_argument('--dropout_rate', type=float, default=0.5) argparser.add_argument('--n_layers', type=int, default=1) argparser.add_argument('--dim_y', type=int, default=200) argparser.add_argument('--dim_z', type=int, default=500) argparser.add_argument('--dim_emb', type=int, default=100) # training config argparser.add_argument('--suffix', type=str, default='') argparser.add_argument('--load_model', action='store_true', help='whether load the model for test') argparser.add_argument('--save_model', action='store_true', help='whether save the model for test') argparser.add_argument('--train_checkpoint_frequency', type=int, default=4, help='how many checkpoints in one training epoch') argparser.add_argument('--training_portion', type=float, default=1.0) argparser.add_argument('--source_training_portion', type=float, default=1.0) # Multi-dataset support argparser.add_argument( '--domain_adapt', action='store_true', help='whether use multidataset for domain-adaptation') argparser.add_argument('--source_dataset', type=str, default='yelp') argparser.add_argument('--dim_d', type=int, default=50, help='The dimension of domain vector.') argparser.add_argument('--alpha', type=float, default=0.0, help='The weight of domain loss.') # Yelp/Amazon online dataset for test only argparser.add_argument( '--online_test', action='store_true', help='whether to use human annotated sentences to evalute the bleu.') argparser.add_argument( '--save_samples', action='store_true', help='whether to save validation samples from the model.') argparser.add_argument('--test', action='store_true', help='whether to test model or not.') argparser.add_argument('--atp', action='store_true', help='whether to access atp or not.') args = argparser.parse_args() # check whether use online annotated dataset from human if args.dataset in ['yelp', 'amazon']: args.online_test = True # update data path according to single dataset or multiple dataset if args.domain_adapt: args = update_domain_adapt_datapath(args) else: if args.atp: args.dataDir = get_data_paths()['data-pool'] + '/DAST' args.dataDir = os.path.join(args.dataDir, 'data') data_root = os.path.join(args.dataDir, args.dataset) args.train_path = os.path.join(data_root, 'train') args.valid_path = os.path.join(data_root, 'valid') args.test_path = os.path.join(data_root, 'test') args.vocab = os.path.join(data_root, 'vocab') # update output path args.modelDir = os.path.join(args.modelDir, 'save_model') args.classifier_path = os.path.join(args.modelDir, 'classifier', args.dataset) args.lm_path = os.path.join(args.modelDir, 'lm', args.dataset) args.styler_path = os.path.join(args.modelDir, 'styler') # update batch size if using parallel training if 'para' in args.dataset: args.batch_size = int(args.batch_size / 2) # update output path if not args.logDir: # if not in philly enviroment args.logDir = 'logs' args.logDir = os.path.join(args.logDir, args.network, args.suffix) log_dir = Path(args.logDir) if not log_dir.exists(): print('=> creating {}'.format(log_dir)) log_dir.mkdir(parents=True) time_str = time.strftime('%Y-%m-%d-%H-%M') log_file = '{}_{}_{}.log'.format(args.network, args.suffix, time_str) # update the suffix for tensorboard file name args.suffix = '{}_{}_{}'.format(args.network, args.suffix, time_str) final_log_file = log_dir / log_file head = '%(asctime)-15s %(message)s' logging.basicConfig(filename=str(final_log_file), format=head) logger = logging.getLogger() logger.setLevel(logging.INFO) console = logging.StreamHandler() logging.getLogger('').addHandler(console) logger.info('------------------------------------------------') logger.info(pprint.pformat(args)) logger.info('------------------------------------------------') return args
save_frames = False save_model = False load_model = True print('prioritized_replay: ' + str(prioritized_replay)) print('soft_target_update: ' + str(soft_target_update)) print('dueling_dqn: ' + str(dueling_dqn)) print('noisy_network: ' + str(noisy_network)) # ==================================================================================================# # PATHS SETUP # # ==================================================================================================# run_name = args.run_name if cluster: data_paths = get_data_paths() patient_path = data_paths[ 'data1'] + "/HHase_Robotic_RL/NAS_Sacrum_Scans/Patient_files/" patient_data_path = data_paths[ 'data1'] + "/HHase_Robotic_RL/NAS_Sacrum_Scans/" load_model_path = data_paths[ 'data1'] + "/HHase_Robotic_RL/Models/model_best.pth" output_path = get_outputs_path() tensorboard_path = get_outputs_path() experiment = Experiment() else: patient_path = "./../Data/Patient_files/" patient_data_path = "./../Data/" output_path = './' tensorboard_path = './runs/' load_model_path = "./../Data/pretrained_model/model_best.pth"
+ "_" + hparams.dataset.split(".")[-1] + "_" + hparams.outer_model.replace(".", "_") ) print(f'This will run on polyaxon: {str(hparams.on_polyaxon)}') # hparams.device = torch.device('cuda' if hparams.on_polyaxon else 'cpu') hparams.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('torch.cuda.is_available(): ', torch.cuda.is_available()) print('device: ', hparams.device) if hparams.on_polyaxon: from polyaxon_client.tracking import Experiment, get_data_paths, get_outputs_path hparams.data_root = get_data_paths()['data1'] + polyaxon_folder hparams.output_path = get_outputs_path() poly_experiment_info = Experiment.get_experiment_info() poly_experiment_nr = poly_experiment_info['experiment_name'].split(".")[-1] hparams.name = poly_experiment_nr + "_" + exp_name print(f'get_outputs_path: {get_outputs_path()} \n ' f'experiment_info: {poly_experiment_info} \n experiment_name: {poly_experiment_nr}') else: date_str = datetime.now().strftime("%y%m%d-%H%M%S_") hparams.name = 'local_' + date_str + exp_name # hparams.output_path = Path(hparams.output_path).absolute() / hparams.name wandb_logger = WandbLogger(name=hparams.name, project=f"aortaSegm-{hparams.outer_model.split('.')[-1]}-{hparams.inner_module.split('.')[-1]}") # wandb.init(project=f"aortaSegm-{hparams.outer_model.split('.')[-1]}-{hparams.inner_module.split('.')[-1]}")