def create_dataset(self): # create train and val dataloader for phase, dataset_opt in self.opt['datasets'].items(): if phase == 'train': train_set = create_dataset(dataset_opt, split=phase) train_size = int( math.ceil(len(train_set) / dataset_opt['batch_size'])) self.logger.info( 'Number of train images: {:,d}, iters per epoch: {:,d}'. format(len(train_set), train_size)) total_iters = int(self.opt['niter']) total_epochs = int(math.ceil(total_iters / train_size)) self.logger.info( 'Total epochs needed: {:d} for iters {:,d}'.format( total_epochs, total_iters)) self.train_loader = create_dataloader(train_set, dataset_opt) elif phase == 'val': val_set = create_dataset(dataset_opt, split=phase) self.val_loader = create_dataloader(val_set, dataset_opt) self.logger.info('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) elif phase == 'mix': mix_set = create_dataset(dataset_opt, split=phase) self.mix_loader = create_dataloader(mix_set, dataset_opt) self.logger.info('Number of mix images in [{:s}]: {:d}'.format( dataset_opt['name'], len(mix_set))) else: raise NotImplementedError( 'Phase [{:s}] is not recognized.'.format(phase)) assert self.train_loader is not None # assert self.val_loader is not None self.total_epochs = total_epochs self.total_iters = total_iters
def create_dataset(self): # create train and val dataloader for phase, dataset_opt in self.opt['datasets'].items(): if phase == 'train': self.train_set = create_dataset(dataset_opt, split=phase) dataset_opt['resample'] = self.resample dataset_opt['batch_size'] = max(dataset_opt['batch_size']//8, 1) aux_set1 = create_dataset(dataset_opt, split=phase) aux_set2 = create_dataset(dataset_opt, split=phase) aux_set3 = create_dataset(dataset_opt, split=phase) # self.train_set = ConcatDataset( # train_set, # aux_set1, # aux_set2, # aux_set3 # ), train_size = int(math.ceil(len(self.train_set) / dataset_opt['batch_size'])) self.logger.info('rank {}, Number of train images: {:,d}, iters per epoch: {:,d}'.format(self.rank, len( self.train_set), train_size)) total_iters = int(self.opt['niter']) total_epochs = int(math.ceil(total_iters / train_size)) self.logger.info('rank {}, Total epochs needed: {:d} for iters {:,d}'.format(self.rank, total_epochs, total_iters)) self.train_loader = create_dataloader(self.train_set, dataset_opt) aux_loader1 = create_dataloader(aux_set1, dataset_opt) aux_loader2 = create_dataloader(aux_set2, dataset_opt) aux_loader3 = create_dataloader(aux_set3, dataset_opt) self.train_iter = iter(self._cycle(self.train_loader)) aux_iter1 = iter(self._cycle(aux_loader1)) aux_iter2 = iter(self._cycle(aux_loader2)) aux_iter3 = iter(self._cycle(aux_loader3)) self.iters = [self.train_iter, aux_iter1, aux_iter2, aux_iter3] elif phase == 'val': val_set = create_dataset(dataset_opt, split=phase) self.val_loader = create_dataloader(val_set, dataset_opt) self.logger.info('rank {}, Number of val images in [{:s}]: {:d}'.format(self.rank, dataset_opt['name'], len(val_set))) elif phase == 'test': test_set = create_dataset(dataset_opt, split=phase) self.test_loader = create_dataloader(test_set, dataset_opt) self.logger.info('rank {}, Number of test images in [{:s}]: {:d}'.format(self.rank, dataset_opt['name'], len(test_set))) else: raise NotImplementedError('Phase [{:s}] is not recognized.'.format(phase)) assert self.train_loader is not None # assert self.val_loader is not None self.total_epochs = total_epochs self.total_iters = total_iters
momentum=0.9) loss_func = nn.CrossEntropyLoss().cuda() opt = { 'name': 'RSSCN7', 'lmdb': True, 'resample': False, 'dataroot': '../../data/RSSCN7/', 'mode': 'file', 'batch_size': 64, "use_shuffle": True, "n_workers": 0, "num_classes": 7 } train_set = create_dataset(opt, train=True) train_loader = create_dataloader(train_set, opt) # pre_optimizer = optim.SGD([{'params': [param for name, param in network.named_parameters() if 'fc' not in name]}, {'params': network.fc.parameters(), 'lr': 1}], lr=1, momentum=0.9) # # new_dict = pre_optimizer.state_dict() # self_dict = optimizer.state_dict() # self_dict['param_groups'][0].update(new_dict['param_groups'][0]) # # # self_dict.update(new_dict) # optimizer.load_state_dict(self_dict) # optimizer_dict = weights_replace(optimizer.state_dict(), pre_optimizer.state_dict()) # schedulers = [] # schedulers.append(lr_scheduler.MultiStepLR(optimizer, [30, 80], 0.1)) multistep = [30, 80] lambda1 = lambda step: 0.1**sum([step >= mst for mst in multistep])
depth, path = item imageio.imwrite(path, depth) def worker(q): while True: item = q.get() if item is None: break do_work(item) if __name__ == '__main__': opt = TestOptions().parse() torch.cuda.set_device(opt.gpu_ids[0]) dataset = create_dataset(opt) dataset_size = len(dataset) print('The number of test images = %d' % dataset_size) model = create_model( opt) # create a model given opt.model and other options model.setup() a2b_path = os.path.join(opt.img_dir, opt.name, opt.phase, 'A2B', 'depth') util.mkdirs(a2b_path) # util.mkdirs(os.path.join(path, model_name, phase, 'B2A', 'depth')) processes = [] for i in range(n_processes): p = multiprocessing.Process(target=worker, args=(q, )) processes.append(p) p.start()
def main(config_path: str): config = process_config(config_path) train_dataset, test_dataset = create_dataset(config) vgg11 = VGG11(config).compile_model() trainer = VGG11Trainer(vgg11, train_dataset, test_dataset, config) trainer.train()
from weighted_loss import edge_weighted_loss dataloader.NUM_INPUT_OBS = model.NUM_INPUT_OBS dataloader.NUM_TEST_OBS = model.NUM_TEST_OBS TEST = len(sys.argv) > 2 and sys.argv[2] == 'test' print('Creating datasets') def get_relevant(inp, label): inp_obs, inp_vp, obs = inp vp, map_label = label return (inp_obs, inp_vp), map_label BATCH_SIZE = 16 if not TEST: train = dataloader.create_dataset('datasets*', batch_size=BATCH_SIZE).map(get_relevant) dev = dataloader.create_dataset('dev', batch_size=BATCH_SIZE).map(get_relevant) print('Creating models') optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001) @tf.function def loss_fn(y_true, y_pred): return edge_weighted_loss(y_true, y_pred, weight=32)[1] e2e_model = model.build_e2e_model() if len(sys.argv) > 1: load = sys.argv[1] e2e_model.load_weights('checkpoints/{}/e2e_model_{}.ckpt'.format(model.CHECKPOINT_PATH, load))
import tensorflow as tf import dataloader from weighted_loss import edge_weighted_loss import sys import architectures.recurrent_fc as model dataloader.NUM_INPUT_OBS = model.NUM_INPUT_OBS dataloader.NUM_TEST_OBS = model.NUM_TEST_OBS BATCH_SIZE = 16 print('Creating datasets') train_data = dataloader.create_dataset('datasets*', batch_size=BATCH_SIZE) dev_data = dataloader.create_dataset('dev', batch_size=BATCH_SIZE) print('Creating models') representation_net = model.representation_network(True) mapping_net = model.mapping_network() load = 0 if len(sys.argv) > 1: load = int(sys.argv[1]) representation_net.load_weights('checkpoints/{}/repnet_{}.cpkt'.format( model.CHECKPOINT_PATH, load)) mapping_net.load_weights('checkpoints/{}/mapnet_{}.cpkt'.format( model.CHECKPOINT_PATH, load)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001) print('Training') EPOCHS = 10
def create_dataset(self): # create train and val dataloader for phase, dataset_opt in self.opt['datasets'].items(): if phase == 'train': if self.opt['varyOnCV']: train_set = create_dataset(dataset_opt, split=phase) nfold = self.num_tasks # nfold = 5 folds = cv_split(train_set, nfold, self.comm) self.folds_loaders = [create_dataloader(f, dataset_opt) for f in folds] self.train_set = folds.pop(dataset_opt['fold']-1) self.logger.info("split into {} folds, currently in fold {}".format(nfold, dataset_opt['fold'])) # self.val_set = val_fold if self.opt['varyOnSample']: self.train_set = ResampleDataset(self.train_set) else: self.train_set = create_dataset(dataset_opt, split=phase) # self.opt['varyOnSample'] = True if self.opt['varyOnSample']: self.train_set = ResampleDataset(self.train_set) # self.opt['create_val'] = True if self.opt['create_val']: # task0 for 0.1, else for random in [0, 0.3] ratio = 0.1 # if self.task_id == 0: # ratio = 0.1 # else: # ratio = np.random.choice([0.1,0.2,0.3]) self.train_set, val_set = train_val_split(self.train_set, ratio, comm=None) #self.comm # val_folds = self.comm.allgather(val_set) # self.logger.info([vf[0] for vf in val_folds]) # test if val_folds in all ranks are the same # self.folds_loaders = [create_dataloader(f, dataset_opt) for f in val_folds] self.val_loader = create_dataloader(val_set, dataset_opt) self.logger.info( 'rank {}, Number of val images in [{:s}]: {:d}'.format(self.rank, dataset_opt['name'], len(val_set))) # self.opt['varyOnSample'] = True self.train_loader = create_dataloader(self.train_set, dataset_opt) self.train_iter = iter(self._cycle(self.train_loader)) train_size = int(math.ceil(len(self.train_set) / dataset_opt['batch_size'])) self.logger.info('rank {}, Number of train images: {:,d}, iters per epoch: {:,d}'.format(self.rank, len(self.train_set), train_size)) total_iters = int(self.opt['niter']) total_epochs = int(math.ceil(total_iters / train_size)) self.logger.info('rank {}, Total epochs needed: {:d} for iters {:,d}'.format(self.rank, total_epochs, total_iters)) self.total_epochs = total_epochs self.total_iters = total_iters elif phase == 'val' and not self.opt['varyOnCV'] and not self.opt['create_val']: val_set = create_dataset(dataset_opt, split=phase) self.val_loader = create_dataloader(val_set, dataset_opt) self.logger.info('rank {}, Number of val images in [{:s}]: {:d}'.format(self.rank, dataset_opt['name'], len(val_set))) elif phase == 'test': test_set = create_dataset(dataset_opt, split=phase) self.test_loader = create_dataloader(test_set, dataset_opt) self.logger.info('rank {}, Number of test images in [{:s}]: {:d}'.format(self.rank, dataset_opt['name'], len(test_set))) else: raise NotImplementedError('Phase [{:s}] is not recognized.'.format(phase)) assert self.train_loader is not None
assert (0) print(args) print(f'Total Embedding Dimension: {total_embedding_dim}') param_dict = set_hyperparameter_dict(args) param_dict['input_dim']['feature'] = feature_data[0][0].shape[0] param_dict['input_dim']['liwc'], = liwc_embedding[0][0].shape param_dict['input_dim']['vader'], = vader_embedding[0][0].shape param_dict['input_dim']['liwc_leaves'] = liwc_leaves[0][0].shape[ 0] * liwc_leaves[0][0].shape[1] param_dict['input_dim']['vader_leaves'] = vader_leaves[0][0].shape[ 0] * vader_leaves[0][0].shape[1] param_dict['input_dim']['electra'] = electra_embedding[0][0].shape[1] print('\nParameters:') pp.pprint(param_dict) # Get Dataset and DataLoader train_dataset, valid_dataset, test_dataset = create_dataset( (feature_data, liwc_embedding, vader_embedding, electra_embedding, liwc_leaves, vader_leaves, labels), ) if args.test: get_embeddings(train_dataset, valid_dataset, test_dataset, param_dict) else: pred_auc, prob_auc, max_model = run_train(args.seed, train_dataset, valid_dataset, test_dataset, param_dict)