class Logger(object): def __init__(self, opt): self.exp_name = opt['name'] self.use_tb_logger = opt['use_tb_logger'] self.opt = opt['logger'] self.log_dir = opt['path']['log'] # loss log file self.loss_log_path = os.path.join(self.log_dir, 'loss_log.txt') with open(self.loss_log_path, 'a') as log_file: log_file.write('=============== Time: ' + get_timestamp() + ' =============\n') log_file.write( '================ Training Losses ================\n') # val results log file self.val_log_path = os.path.join(self.log_dir, 'val_log.txt') with open(self.val_log_path, 'a') as log_file: log_file.write('================ Time: ' + get_timestamp() + ' ===============\n') log_file.write( '================ Validation Results ================\n') if self.use_tb_logger and 'debug' not in self.exp_name: from tensorboard_logger import Logger as TensorboardLogger self.tb_logger = TensorboardLogger('../tb_logger/' + self.exp_name) def print_format_results(self, mode, rlt): epoch = rlt.pop('epoch') iters = rlt.pop('iters') time = rlt.pop('time') model = rlt.pop('model') if 'lr' in rlt: lr = rlt.pop('lr') message = '<epoch:{:3d}, iter:{:8,d}, time:{:.2f}, lr:{:.1e}> '.format( epoch, iters, time, lr) else: message = '<epoch:{:3d}, iter:{:8,d}, time:{:.2f}> '.format( epoch, iters, time) for label, value in rlt.items(): if mode == 'train': message += '{:s}: {:.2e} '.format(label, value) elif mode == 'val': message += '{:s}: {:.4e} '.format(label, value) # tensorboard logger if self.use_tb_logger and 'debug' not in self.exp_name: self.tb_logger.log_value(label, value, iters) # print in console print(message) # write in log file if mode == 'train': with open(self.loss_log_path, 'a') as log_file: log_file.write(message + '\n') elif mode == 'val': with open(self.val_log_path, 'a') as log_file: log_file.write(message + '\n') def log_message(self, rlt): iters = rlt.pop('iters') for label, value in rlt.items(): self.tb_logger.log_value(label, value, iters)
def trainer(): # 1. Load dataset dataset_train = dataset_provider(config['dataset_name'], config['dataset_root'], is_train=True) dataloader_train = DataLoader(dataset_train, config['batch_size'], shuffle=True, num_workers=config['num_workers']) dataset_eval = dataset_provider(config['dataset_name'], config['dataset_root'], is_train=False) dataloader_eval = DataLoader(dataset_eval, config['batch_size'], shuffle=False, num_workers=config['num_workers']) # 2. Build model net = model_provider(config['model'], **config['model_param']).cuda(config['device_ids'][0]) net = nn.DataParallel(net, device_ids=config['device_ids']) # 3. Criterion criterion = nn.CrossEntropyLoss().cuda(config['device_ids'][0]) # 4. Optimizer optimizer = config['optimizer'](net.parameters(), **config['optimizer_param']) scheduler = config['scheduler']( optimizer, ** config['scheduler_param']) if config['scheduler'] else None # 5. Tensorboard logger logger_train = Logger('logs/train') logger_eval = Logger('logs/eval') # 6. Train loop for epoch in range(config['num_epoch']): # train print('---------------------- Train ----------------------') train_op(net, dataloader_train, criterion, optimizer, epoch, logger_train) # evaluation if epoch % config['eval_per_epoch'] == config['eval_per_epoch'] - 1: print('---------------------- Evaluation ----------------------') eval_op(net, dataloader_eval, criterion, epoch, logger_eval) # save weights torch.save( net.state_dict(), 'weights/{}/{}_{}.newest.pkl'.format(config['dataset_name'], config['model'], time_id)) # scheduler if scheduler is not None: scheduler.step()
def __init__(self, env='default', log_dir='runs/BiGRU', **kwargs): # self.vis = visdom.Visdom(env=env, **kwargs) self.tenbd = Logger(log_dir, flush_secs=2) # 记录数据的横向坐标{'img':2, 'loss':12} self.index = {} # 记录一些log信息 self.log_text = ''
def trainer(index=0): # 1. Load dataset dataset_train = dataset_provider(config['dataset_name'], config['dataset_root'], is_train=True, fold_idx=index) dataloader_train = DataLoader(dataset_train, config['batch_size'], shuffle=True, num_workers=config['num_workers']) dataloader_eval = dataset_provider(config['dataset_name'], config['dataset_root'], is_train=False, fold_idx=index) dataloader_eval = DataLoader(dataloader_eval, 10, shuffle=False, num_workers=config['num_workers']) # 2. Build model net = model_provider(config['model'], img_chn=config['image_channels'], n_cls=config['num_class']).cuda(config['device_ids'][0]) net = nn.DataParallel(net, device_ids=config['device_ids']) # 3. Criterion criterion = criterion_provider(config['criterion']).cuda(config['device_ids'][0]) # 4. Optimizer optimizer = config['optimizer'](net.parameters(), lr=config['lr']) scheduler = None if config['lr_scheduler']: step_size = len(dataloader_train) * config['num_epoch'] // (4 * 21) + 1 scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size , gamma=0.9) # 5. Evaluation eval_op = eval_op_provider(config['eval_op_name']) # 6. Tensorboard logger logger_train = Logger('logs/{}/{}/fold_{}/train'.format(config['model'], time_id, index)) logger_eval = Logger('logs/{}/{}/fold_{}/eval'.format(config['model'], time_id, index)) # 7. Train loop dice_max = -1 for epoch in range(config['num_epoch']): # train print('---------------------- Train ----------------------') train_op(net, dataloader_train, criterion, optimizer, scheduler, epoch, logger_train) # evaluation if epoch % 10 == 9: print('---------------------- Evaluation ----------------------') dice, IoU, sensitivity, specificity = \ eval_op(net, dataloader_eval, config['device_ids'][0], criterion, config['num_class'], epoch, logger_eval, config['log_image']) # save weights torch.save(net.state_dict(), 'weights/{}/{}.newest.{}.pkl'.format(time_id, config['model'], index)) if dice >= dice_max: dice_max = dice torch.save(net.state_dict(), 'weights/{}/{}.best.{}.pkl'.format(time_id, config['model'], index)) return dice, IoU, sensitivity, specificity
class Visualizer(): def __init__(self, log_dir='runs/', **kwargs): self.tenbd = Logger(log_dir, flush_secs=10) self.index = {} self.log_text = '' def plot(self, name, y): x = self.index.get(name, 0) self.tenbd.log_value(name, y, x) self.index[name] = x + 1 def plotMany(self, data): for k, v in data.iteritems(): self.plot(k, v)
def _save(self, checkpoint_dir): file_path = checkpoint_dir + '/ray_SNN_W_epoch_' + str( self.epoch) + '.pickle' # self.model.save_weights(file_path) if self.epoch % 20 == 0: with open(file_path, 'w') as fw: save = {'w_h': self.model.w_h, 'w_o': self.model.w_o} pickle.dump(save, fw) # writer = SummaryWriter(checkpoint_dir) logger = Logger(checkpoint_dir + '/images/') # logger_hist = Logger(checkpoint_dir + '/histograms/') # img = np.random.rand(10, 10) images = [] # mx = np.max(self.w_h[0]) # mn = np.min(self.w_h[0]) for tp in range(self.model.outputs): tpp = self.model.w_o[:, tp] sz = np.ceil((np.sqrt(np.size(tpp)))).astype(int) tpm = np.zeros(sz * sz, dtype=float) tpm[0:len(tpp)] = tpp tpp = tpm tpp = np.reshape(tpp, newshape=[sz, sz]) images.append(scale(tpp)) cimages = combine_matrix(*images) logger.log_images('key_out', [cimages], step=self.epoch) logger.log_histogram('key_out', [self.model.w_o], step=self.epoch) for k in range(len(self.h)): images = [] for tp in range(self.h[k]): tpp = self.model.w_h[k][:, tp] sz = np.ceil((np.sqrt(np.size(tpp)))).astype(int) tpm = np.zeros(sz * sz, dtype=float) tpm[0:np.size(tpp)] = tpp tpp = tpm tpp = np.reshape(tpp, newshape=[sz, sz]) images.append(scale(tpp)) cimages = combine_matrix(*images) logger.log_images('key_' + str(k), [cimages], step=self.epoch) logger.log_histogram('key' + str(k), [self.model.w_h[k]], step=self.epoch) return file_path
def main(args): datadir = get_data_dir(args.db) outputdir = get_output_dir(args.db) logger = None if args.tensorboard: # One should create folder for storing logs loggin_dir = os.path.join(outputdir, 'runs', 'pretraining') if not os.path.exists(loggin_dir): os.makedirs(loggin_dir) loggin_dir = os.path.join(loggin_dir, '%s' % (args.id)) if args.clean_log: remove_files_in_dir(loggin_dir) logger = Logger(loggin_dir) use_cuda = torch.cuda.is_available() # Set the seed for reproducing the results random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if use_cuda: torch.cuda.manual_seed_all(args.manualSeed) torch.backends.cudnn.enabled = True cudnn.benchmark = True kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {} trainset = DCCPT_data(root=datadir, train=True, h5=args.h5) testset = DCCPT_data(root=datadir, train=False, h5=args.h5) nepoch = int( np.ceil( np.array(args.niter * args.batchsize, dtype=float) / len(trainset))) step = int( np.ceil( np.array(args.step * args.batchsize, dtype=float) / len(trainset))) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batchsize, shuffle=True, **kwargs) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=True, **kwargs) return pretrain( args, outputdir, { 'nlayers': 4, 'dropout': 0.2, 'reluslope': 0.0, 'nepoch': nepoch, 'lrate': [args.lr], 'wdecay': [0.0], 'step': step }, use_cuda, trainloader, testloader, logger)
class Visualizer(): ''' 封装了visdom,tensorboard_logger, 更方便记录loss ''' def __init__(self, env='default', log_dir='runs/BiGRU', **kwargs): # self.vis = visdom.Visdom(env=env, **kwargs) self.tenbd = Logger(log_dir, flush_secs=2) # 记录数据的横向坐标{'img':2, 'loss':12} self.index = {} # 记录一些log信息 self.log_text = '' # def reinit(self, env='default', **kwargs): # ''' # 更改visdom的配置 # ''' # self.vis = visdom.Visdom(env=env, **kwargs) # return vis def plot(self, name, y): ''' self.plot('loss',0.23) ''' x = self.index.get(name, 0) # self.vis.line(Y=np.array([y]), # X=np.array([x]), # win=name, # opts=dict(title=name), # update=None if x==0 else 'append') self.tenbd.log_value(name, y, x) self.index[name] = x + 1 def plotMany(self, data): ''' 一次渲染多个数据 ''' for k, v in data.iteritems(): self.plot(k, v) def log(self, info, win='log_text'): '''
def train_mlp_epoch(epoch, args, rnn, output, data_loader, optimizer_mlp, optimizer_output, scheduler_mlp, scheduler_output): rnn.train() output.train() loss_sum = 0 for batch_idx, data in enumerate(data_loader): rnn.zero_grad() output.zero_grad() x_unsorted = data['x'].float() y_unsorted = data['y'].float() y_len_unsorted = data['len'] y_len_max = max(y_len_unsorted) x_unsorted = x_unsorted[:, 0:y_len_max, :] y_unsorted = y_unsorted[:, 0:y_len_max, :] # Initialize gru hidden state according to batch size rnn.hidden = rnn.init_hidden(batch_size=x_unsorted.size(0)) # Sort input y_len,sort_index = torch.sort(y_len_unsorted,0,descending=True) y_len = y_len.numpy().tolist() x = torch.index_select(x_unsorted,0,sort_index) y = torch.index_select(y_unsorted,0,sort_index) x = Variable(x) y = Variable(y) h = rnn(x, pack=True, input_len=y_len) y_pred = output(h) y_pred = torch.sigmoid(y_pred) y_pred = pack_padded_sequence(y_pred, y_len, batch_first=True) y_pred = pad_packed_sequence(y_pred, batch_first=True)[0] # Use cross entropy loss loss = binary_cross_entropy_weight(y_pred, y) loss.backward() # Update deterministic and gru optimizer_output.step() optimizer_mlp.step() scheduler_output.step() scheduler_mlp.step() # Output only the first batch's statistics for each epoch if batch_idx==0: print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, num_layer: {}, hidden: {}'.format( epoch, args.epochs,loss.item(), args.graph_type, args.num_layers, args.hidden_size_rnn)) # Logging Logger('loss_'+args.fname, loss.item(), epoch*args.batch_ratio+batch_idx) # Update the loss sum loss_sum += loss.item() return loss_sum/(batch_idx+1)
def main(): parser=argparse.ArgumentParser() parser.add_argument("--model_save_path", "-s", type = str, default = "./saved_models", help = "path to save the model") parser.add_argument("--data_path", "-d", type = str, default = "./", help = "path to computed mfcc numpy files") parser.add_argument("--log_path", "-l", type = str, default = "./summary/", help = "path to log dir") parser.add_argument("--batch_size", type=int, default=1000, help="batch size") parser.add_argument("--epochs", type=int, default=500, help="batch size") parser.add_argument("--epochs_per_save", type=int, default=10, help="number of epochs after which to save model and training checkpoint") parser.add_argument("--noise", type=float, default=0.0, help="with what probability to add noise to augment training") args=parser.parse_args() BATCH_SIZE = args.batch_size epochs = args.epochs epochs_per_save = args.epochs_per_save curr_dir = os.getcwd() try: os.chdir(args.model_save_path) except OSError: os.mkdir(args.model_save_path) os.chdir(curr_dir) data_path = args.data_path dataset = AudioMFCCDataset(aud_mfcc_file=os.path.join(data_path, "train_aud_mfcc_norm.npy"), aud_mfcc_lengths=os.path.join(data_path,"train_sample_lengths_norm.npy"), data_size=None) print("loaded dataset") loader = DataLoader(dataset, batch_size=BATCH_SIZE, collate_fn=AudioMFCCDataset.pack_batch) print("created iter on loaded data") TOTAL_OBS = dataset.__len__() # dev set not std trimmmed to check if longer audio is also getting properly encoded. dev_dataset = AudioMFCCDataset(aud_mfcc_file=os.path.join(data_path, "dev_aud_mfcc_norm.npy"), aud_mfcc_lengths=os.path.join(data_path,"dev_sample_lengths_norm.npy"), data_size=None) print("loaded dev dataset") dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, collate_fn=AudioMFCCDataset.pack_batch) print("created iter on loaded dev data") # logging information now = datetime.datetime.now() logger = Logger(args.log_path + now.strftime("%Y-%m-%d_%H_%M"), flush_secs=5) aud2vec = Seq2SeqAutoencoder(dataset.num_features(), noise_prob=0.0) device = torch.device(DEVICE_ID if torch.cuda.is_available() else "cpu") gpu_aud2vec = aud2vec.to(device) print("model created") print("training begins") losses = gpu_aud2vec.train(loader, dev_loader, epochs, epochs_per_save, args.model_save_path, logger) torch.save(aud2vec.state_dict(), os.path.join(args.model_save_path, "aud2vec.pth"))
def test_real_histo_data(tmpdir): logger = Logger(str(tmpdir), flush_secs=0.1) logger.log_histogram('hist2', [1, 7, 6, 9, 8, 1, 4, 5, 3, 7], step=1) logger.log_histogram('hist2', [5, 3, 2, 0, 8, 5, 7, 7, 7, 2], step=2) logger.log_histogram('hist2', [1, 2, 2, 1, 5, 1, 8, 4, 4, 1], step=3) tf_log, = glob.glob(str(tmpdir) + '/*') assert os.path.basename(tf_log).startswith('events.out.tfevents.')
def test_smoke_logger(tmpdir): logger = Logger(str(tmpdir), flush_secs=0.1) for step in range(10): logger.log_value('v1', step * 1.5, step) logger.log_value('v2', step**1.5 - 2) time.sleep(0.5) tf_log, = tmpdir.listdir() assert tf_log.basename.startswith('events.out.tfevents.')
class Logger: """ Deals with writing tensorboard summaries. And logging metric history to a pickle file """ def __init__(self, outdir): self.outdir = outdir self.tf_logger = TFLogger(os.path.join(outdir, 'run'), flush_secs=2) self.metric_history: Dict = defaultdict(list) def log_metrics(self, phase, metrics, global_step): """ Logs scalar values as tf summaries. Don't bother with true_mean, it stays the same and doesn't really work as a graph. """ for name, value in metrics.items(): if name != "true_mean": self.tf_logger.log_value(f"{phase} {name}", value, global_step) # save standard pickle object for easy matloblib plot or perf over epochs self.metric_history[phase].append(metrics) with open(os.path.join(self.outdir, "metric_history.pkl"), "wb") as metric_file: pickle.dump(self.metric_history, metric_file)
def test_dummy(): logger = Logger(None, is_dummy=True) for step in range(3): logger.log_value('A v/1', step, step) logger.log_value('A v/2', step * 2, step) assert dict(logger.dummy_log) == { 'A_v/1': [(0, 0), (1, 1), (2, 2)], 'A_v/2': [(0, 0), (1, 2), (2, 4)], }
def test_real_image_data(tmpdir): logger = Logger(str(tmpdir), flush_secs=0.1) img = np.random.rand(10, 10) images = [img, img] logger.log_images('key', images, step=1) logger.log_images('key', images, step=2) logger.log_images('key', images, step=3) tf_log, = glob.glob(str(tmpdir) + '/*') assert os.path.basename(tf_log).startswith('events.out.tfevents.')
def test_dummy_images(): logger = Logger(None, is_dummy=True) img = np.random.rand(10, 10) images = [img, img] logger.log_images('key', images, step=1) logger.log_images('key', images, step=2) logger.log_images('key', images, step=3) assert dict(logger.dummy_log) == { 'key': [(1, images), (2, images), (3, images)] }
def test_dummy_histo(): logger = Logger(None, is_dummy=True) bins = [0, 1, 2, 3] logger.log_histogram('key', (bins, [0.0, 1.0, 2.0]), step=1) logger.log_histogram('key', (bins, [1.0, 1.5, 2.5]), step=2) logger.log_histogram('key', (bins, [0.0, 1.0, 2.0]), step=3) assert dict(logger.dummy_log) == { 'key': [(1, (bins, [0.0, 1.0, 2.0])), (2, (bins, [1.0, 1.5, 2.5])), (3, (bins, [0.0, 1.0, 2.0]))] }
def test_unique(): logger = Logger(None, is_dummy=True) for step in range(1, 3): # names that normalize to the same valid name logger.log_value('A v/1', step, step) logger.log_value('A\tv/1', step * 2, step) logger.log_value('A v/1', step * 3, step) assert dict(logger.dummy_log) == { 'A_v/1': [(1, 1), (2, 2)], 'A_v/1/1': [(1, 2), (2, 4)], 'A_v/1/2': [(1, 3), (2, 6)], }
def __init__(self, opt): self.exp_name = opt['name'] self.use_tb_logger = opt['use_tb_logger'] self.opt = opt['logger'] self.log_dir = opt['path']['log'] # loss log file self.loss_log_path = os.path.join(self.log_dir, 'loss_log.txt') with open(self.loss_log_path, "a") as log_file: log_file.write('=============== Time: ' + get_timestamp() + ' =============\n') log_file.write('================ Training Losses ================\n') # val results log file self.val_log_path = os.path.join(self.log_dir, 'val_log.txt') with open(self.val_log_path, "a") as log_file: log_file.write('================ Time: ' + get_timestamp() + ' ===============\n') log_file.write('================ Validation Results ================\n') if self.use_tb_logger and 'debug' not in self.exp_name: from tensorboard_logger import Logger as TensorboardLogger self.tb_logger = TensorboardLogger('../tb_logger/' + self.exp_name)
def test_real_histo_tuple(tmpdir): """ from tests.test_tensorboard_logger import * import ubelt as ub ub.delete(ub.ensure_app_cache_dir('tf_logger')) tmpdir = ub.ensure_app_cache_dir('tf_logger/runs/run1') """ logger = Logger(str(tmpdir), flush_secs=0.1) bins = [-.5, .5, 1.5, 2.5] logger.log_histogram('hist1', (bins, [0.0, 1.0, 2.0]), step=1) logger.log_histogram('hist1', (bins, [1.0, 1.5, 2.5]), step=2) logger.log_histogram('hist1', (bins, [0.0, 1.0, 2.0]), step=3) tf_log, = glob.glob(str(tmpdir) + '/*') assert os.path.basename(tf_log).startswith('events.out.tfevents.')
def test_serialization(tmpdir): logger = Logger(str(tmpdir), flush_secs=0.1, dummy_time=256.5) logger.log_value('v/1', 1.5, 1) logger.log_value('v/22', 16.0, 2) time.sleep(0.5) tf_log, = tmpdir.listdir() assert tf_log.read_binary() == ( # step = 0, initial record b'\x18\x00\x00\x00\x00\x00\x00\x00\xa3\x7fK"\t\x00\x00\x00\x00\x00\x08p@\x1a\rbrain.Event:2\xbc\x98!+' # v/1 b'\x19\x00\x00\x00\x00\x00\x00\x00\x8b\xf1\x08(\t\x00\x00\x00\x00\x00\x08p@\x10\x01*\x0c\n\n\n\x03v/1\x15\x00\x00\xc0?,\xec\xc0\x87' # v/22 b'\x1a\x00\x00\x00\x00\x00\x00\x00\x12\x9b\xd8-\t\x00\x00\x00\x00\x00\x08p@\x10\x02*\r\n\x0b\n\x04v/22\x15\x00\x00\x80A\x8f\xa3\xb6\x88' )
def create_experiment_folder(folder='experiments', tag=None, args=None): if folder is None: folder = 'experiments' if os.path.exists(folder + '/to_delete'): shutil.rmtree(folder + '/to_delete') folder = folder.replace(' ', '_') if os.path.exists(folder): print(" - Folder for experiments found") else: print(" - Creating folder for experiments") os.makedirs(folder) # Load cvs of experiments experiment_csv = '/'.join([folder, "experiments.csv"]) if os.path.isfile(experiment_csv): print(" - Loading experiments.csv file") df = pd.read_csv(experiment_csv, index_col=0) else: print(" - experiments.csv not found, creating one") df = pd.DataFrame(columns=args.keys()) df.to_csv(experiment_csv) #df = df.append(args, ignore_index=True) id = 0 if len(df.index) == 0 or pd.isna( df.index[-1]) else df.index.max() + 1 df.loc[df.index.max() + 1] = pd.Series(args) df.to_csv(experiment_csv) # Creating folder for experiment if tag is None: experiment_folder = '/'.join([folder, str(df.index[-1])]) else: experiment_folder = '/'.join([folder, str(df.index[-1]) + '_' + tag]) os.makedirs(experiment_folder) logs_folder = experiment_folder + '/logs' logger = Logger(logs_folder + "/extra") del df return id, logger, logs_folder, experiment_csv, experiment_folder
def main(): global args args = parser.parse_args() model = define_model(is_resnet=False, is_densenet=False, is_senet=True) model_final = net.modelfinal() model = model.cuda() model_final = model_final.cuda() batch_size = 1 train_loader = loaddata.getTrainingData(batch_size) optimizer = torch.optim.Adam(model_final.parameters(), args.lr, weight_decay=args.weight_decay) logger = Logger(logdir='experiment_cnn', flush_secs=1) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train(train_loader, model,model_final, optimizer, epoch,logger ) if epoch % 10 == 0: save_checkpoint({'state_dict': model.state_dict()},filename='modelcheckpoint.pth.tar') save_checkpoint({'state_dict_final': model_final.state_dict()},filename='finalmodelcheckpoint.pth.tar') print('save: (epoch: %d)' % (epoch+ 1))
def __init__(self, log_dir, label, titles, append_steps=1): """ log_dir : str, directory where all the logs will be written. label : str, root filename for the logs. It shouldn't contain an extension, such as .txt titles : list, title for each log attribute. append_steps : int, """ self.log_dir = log_dir self.label = label self.titles = titles self.append_steps = append_steps self.logs = {} # all title-log pairs that will be traced for this instance self.meters = {} for t in titles: self.logs[t] = [] self.meters[t] = AverageMeter() if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) self.tb_logger = TBLogger(self.log_dir) self.f_txt = open(os.path.join(self.log_dir, '{}.txt'.format(self.label)), 'w')
def __init__(self, opt, tb_logger_suffix=''): self.exp_name = opt['name'] self.use_tb_logger = opt['use_tb_logger'] self.opt = opt['logger'] self.log_dir = opt['path']['log'] if not os.path.isdir(self.log_dir): os.mkdir(self.log_dir) # loss log file self.loss_log_path = os.path.join(self.log_dir, 'loss_log.txt') with open(self.loss_log_path, 'a') as log_file: log_file.write('=============== Time: ' + get_timestamp() + ' =============\n') log_file.write( '================ Training Losses ================\n') # val results log file self.val_log_path = os.path.join(self.log_dir, 'val_log.txt') with open(self.val_log_path, 'a') as log_file: log_file.write('================ Time: ' + get_timestamp() + ' ===============\n') log_file.write( '================ Validation Results ================\n') if self.use_tb_logger: # and 'debug' not in self.exp_name: from tensorboard_logger import Logger as TensorboardLogger logger_dir_num = 0 tb_logger_dir = self.log_dir.replace('experiments', 'logs') if not os.path.isdir(tb_logger_dir): os.mkdir(tb_logger_dir) existing_dirs = sorted([ dir.split('_')[0] for dir in os.listdir(tb_logger_dir) if os.path.isdir(os.path.join(tb_logger_dir, dir)) ], key=lambda x: int(x.split('_')[0])) if len(existing_dirs) > 0: logger_dir_num = int(existing_dirs[-1]) + 1 self.tb_logger = TensorboardLogger( os.path.join(tb_logger_dir, str(logger_dir_num) + tb_logger_suffix))
class Solver(object): def __init__(self, face_data_loader, config): # Data loader self.face_data_loader = face_data_loader # Model parameters self.y_dim = config.y_dim self.num_layers = config.num_layers self.im_size = config.im_size self.g_first_dim = config.g_first_dim self.d_first_dim = config.d_first_dim self.enc_repeat_num = config.enc_repeat_num self.d_repeat_num = config.d_repeat_num self.d_train_repeat = config.d_train_repeat # Hyper-parameteres self.lambda_cls = config.lambda_cls self.lambda_id = config.lambda_id self.lambda_bi = config.lambda_bi self.lambda_gp = config.lambda_gp self.enc_lr = config.enc_lr self.dec_lr = config.dec_lr self.d_lr = config.d_lr self.beta1 = config.beta1 self.beta2 = config.beta2 # Training settings self.num_epochs = config.num_epochs self.num_epochs_decay = config.num_epochs_decay self.num_iters = config.num_iters self.num_iters_decay = config.num_iters_decay self.batch_size = config.batch_size self.trained_model = config.trained_model # Test settings self.test_model = config.test_model # Path self.log_path = config.log_path self.sample_path = config.sample_path self.model_path = config.model_path self.test_path = config.test_path # Step size self.log_step = config.log_step self.sample_step = config.sample_step self.model_save_step = config.model_save_step # Set tensorboard self.build_model() self.use_tensorboard() # Start with trained model if self.trained_model: self.load_trained_model() def build_model(self): # Define encoder-decoder (generator) and a discriminator self.Enc = Encoder(self.g_first_dim, self.enc_repeat_num) self.Dec = Decoder(self.g_first_dim) self.D = Discriminator(self.im_size, self.d_first_dim, self.d_repeat_num) # Optimizers self.enc_optimizer = torch.optim.Adam(self.Enc.parameters(), self.enc_lr, [self.beta1, self.beta2]) self.dec_optimizer = torch.optim.Adam(self.Dec.parameters(), self.dec_lr, [self.beta1, self.beta2]) self.d_optimizer = torch.optim.Adam(self.D.parameters(), self.d_lr, [self.beta1, self.beta2]) if torch.cuda.is_available(): self.Enc.cuda() self.Dec.cuda() self.D.cuda() def load_trained_model(self): self.Enc.load_state_dict( torch.load( os.path.join(self.model_path, '{}_Enc.pth'.format(self.trained_model)))) self.Dec.load_state_dict( torch.load( os.path.join(self.model_path, '{}_Dec.pth'.format(self.trained_model)))) self.D.load_state_dict( torch.load( os.path.join(self.model_path, '{}_D.pth'.format(self.trained_model)))) print('loaded models (step: {})..!'.format(self.trained_model)) def use_tensorboard(self): from tensorboard_logger import Logger self.logger = Logger(self.log_path) def update_lr(self, enc_lr, dec_lr, d_lr): for param_group in self.enc_optimizer.param_groups: param_group['lr'] = enc_lr for param_group in self.dec_optimizer.param_groups: param_group['lr'] = dec_lr for param_group in self.d_optimizer.param_groups: param_group['lr'] = d_lr def reset(self): self.enc_optimizer.zero_grad() self.dec_optimizer.zero_grad() self.d_optimizer.zero_grad() def to_var(self, x, volatile=False): if torch.cuda.is_available(): x = x.cuda() return Variable(x, volatile=volatile) def calculate_accuracy(self, x, y): _, predicted = torch.max(x, dim=1) correct = (predicted == y).float() accuracy = torch.mean(correct) * 100.0 return accuracy def denorm(self, x): out = (x + 1) / 2 return out.clamp_(0, 1) def one_hot(self, labels, dim): """Convert label indices to one-hot vector""" batch_size = labels.size(0) out = torch.zeros(batch_size, dim) out[np.arange(batch_size), labels.long()] = 1 return out def train(self): """Train attribute-guided face image synthesis model""" self.data_loader = self.face_data_loader # The number of iterations for each epoch iters_per_epoch = len(self.data_loader) sample_x = [] sample_l = [] real_y = [] for i, (images, landmark) in enumerate(self.data_loader): labels = images[1] sample_x.append(images[0]) sample_l.append(landmark[0]) real_y.append(labels) if i == 2: break # Sample inputs and desired domain labels for testing sample_x = torch.cat(sample_x, dim=0) sample_x = self.to_var(sample_x, volatile=True) sample_l = torch.cat(sample_l, dim=0) sample_l = self.to_var(sample_l, volatile=True) real_y = torch.cat(real_y, dim=0) sample_y_list = [] for i in range(self.y_dim): sample_y = self.one_hot( torch.ones(sample_x.size(0)) * i, self.y_dim) sample_y_list.append(self.to_var(sample_y, volatile=True)) # Learning rate for decaying d_lr = self.d_lr enc_lr = self.enc_lr dec_lr = self.dec_lr # Start with trained model if self.trained_model: start = int(self.trained_model.split('_')[0]) else: start = 0 # Start training start_time = time.time() for e in range(start, self.num_epochs): for i, (real_image, real_landmark) in enumerate(self.data_loader): #real_x: real image and real_l: conditional side image (landmark heatmap) real_x = real_image[0] real_label = real_image[1] real_l = real_landmark[0] # Sample fake labels randomly rand_idx = torch.randperm(real_label.size(0)) fake_label = real_label[rand_idx] real_y = self.one_hot(real_label, self.y_dim) fake_y = self.one_hot(fake_label, self.y_dim) # Convert tensor to variable real_x = self.to_var(real_x) real_l = self.to_var(real_l) real_y = self.to_var(real_y) fake_y = self.to_var(fake_y) real_label = self.to_var(real_label) fake_label = self.to_var(fake_label) #================== Train Discriminator ================== # # Input images (original image+side images) are concatenated src_output, cls_output = self.D(torch.cat([real_x, real_l], 1)) d_loss_real = -torch.mean(src_output) d_loss_cls = F.cross_entropy(cls_output, real_label) # Compute expression recognition accuracy on synthetic images if (i + 1) % self.log_step == 0: accuracies = self.calculate_accuracy( cls_output, real_label) log = [ "{:.2f}".format(acc) for acc in accuracies.data.cpu().numpy() ] print('Recognition Acc: ') print(log) # Generate outputs and compute loss with fake generated images enc_feat = self.Enc(torch.cat([real_x, real_l], 1)) fake_x, fake_l = self.Dec(enc_feat, fake_y) fake_x = Variable(fake_x.data) fake_l = Variable(fake_l.data) src_output, cls_output = self.D(torch.cat([fake_x, fake_l], 1)) d_loss_fake = torch.mean(src_output) # Discriminator losses d_loss = self.lambda_cls * d_loss_cls + d_loss_real + d_loss_fake self.reset() d_loss.backward() self.d_optimizer.step() # Compute gradient penalty loss real = torch.cat([real_x, real_l], 1) fake = torch.cat([fake_x, fake_l], 1) alpha = torch.rand(real_x.size(0), 1, 1, 1).cuda().expand_as(real) interpolated = Variable(alpha * real.data + (1 - alpha) * fake.data, requires_grad=True) output, cls_output = self.D(interpolated) grad = torch.autograd.grad(outputs=output, inputs=interpolated, grad_outputs=torch.ones( output.size()).cuda(), retain_graph=True, create_graph=True, only_inputs=True)[0] grad = grad.view(grad.size(0), -1) grad_l2norm = torch.sqrt(torch.sum(grad**2, dim=1)) d_loss_gp = torch.mean((grad_l2norm - 1)**2) # Gradient penalty loss d_loss = self.lambda_gp * d_loss_gp self.reset() d_loss.backward() self.d_optimizer.step() # Logging loss = {} loss['D/loss_real'] = d_loss_real.data[0] loss['D/loss_fake'] = d_loss_fake.data[0] loss['D/loss_cls'] = d_loss_cls.data[0] loss['D/loss_gp'] = d_loss_gp.data[0] # ================== Train Encoder-Decoder networks ================== # if (i + 1) % self.d_train_repeat == 0: # Original-to-target and target-to-original domain enc_feat = self.Enc(torch.cat([real_x, real_l], 1)) fake_x, fake_l = self.Dec(enc_feat, fake_y) src_output, cls_output = self.D( torch.cat([fake_x, fake_l], 1)) g_loss_fake = -torch.mean(src_output) #rec_feat = self.Enc(fake_x) rec_feat = self.Enc(torch.cat([fake_x, fake_l], 1)) rec_x, rec_l = self.Dec(rec_feat, real_y) # bidirectional loss of the images g_loss_rec_x = torch.mean(torch.abs(real_x - rec_x)) g_loss_rec_l = torch.mean(torch.abs(real_l - rec_l)) #bidirectional loss of the latent feature g_loss_feature = torch.mean(torch.abs(enc_feat - rec_feat)) #identity loss of the images g_loss_identity_x = torch.mean(torch.abs(real_x - fake_x)) g_loss_identity_l = torch.mean(torch.abs(real_l - fake_l)) # attribute classification loss for the fake generated images g_loss_cls = F.cross_entropy(cls_output, fake_label) # Backward + Optimize (generator (encoder-decoder) losses), we update decoder two times for each encoder update g_loss = g_loss_fake + self.lambda_bi * g_loss_rec_x + self.lambda_bi * g_loss_rec_l + self.lambda_bi * g_loss_feature + self.lambda_id * g_loss_identity_x + self.lambda_id * g_loss_identity_l + self.lambda_cls * g_loss_cls self.reset() g_loss.backward() self.enc_optimizer.step() self.dec_optimizer.step() self.dec_optimizer.step() # Logging Generator losses loss['G/loss_feature'] = g_loss_feature.data[0] loss['G/loss_identity_x'] = g_loss_identity_x.data[0] loss['G/loss_identity_l'] = g_loss_identity_l.data[0] loss['G/loss_rec_x'] = g_loss_rec_x.data[0] loss['G/loss_rec_l'] = g_loss_rec_l.data[0] loss['G/loss_fake'] = g_loss_fake.data[0] loss['G/loss_cls'] = g_loss_cls.data[0] # Print out log if (i + 1) % self.log_step == 0: elapsed = time.time() - start_time elapsed = str(datetime.timedelta(seconds=elapsed)) log = "Elapsed [{}], Epoch [{}/{}], Iter [{}/{}]".format( elapsed, e + 1, self.num_epochs, i + 1, iters_per_epoch) for tag, value in loss.items(): log += ", {}: {:.4f}".format(tag, value) print(log) for tag, value in loss.items(): self.logger.scalar_summary(tag, value, e * iters_per_epoch + i + 1) # Synthesize images if (i + 1) % self.sample_step == 0: fake_image_list = [sample_x] for sample_y in sample_y_list: enc_feat = self.Enc(torch.cat([sample_x, sample_l], 1)) sample_result, sample_landmark = self.Dec( enc_feat, sample_y) fake_image_list.append(sample_result) fake_images = torch.cat(fake_image_list, dim=3) save_image(self.denorm(fake_images.data), os.path.join( self.sample_path, '{}_{}_fake.png'.format(e + 1, i + 1)), nrow=1, padding=0) print('Generated images and saved into {}..!'.format( self.sample_path)) # Save checkpoints if (i + 1) % self.model_save_step == 0: torch.save( self.Enc.state_dict(), os.path.join(self.model_path, '{}_{}_Enc.pth'.format(e + 1, i + 1))) torch.save( self.Dec.state_dict(), os.path.join(self.model_path, '{}_{}_Dec.pth'.format(e + 1, i + 1))) torch.save( self.D.state_dict(), os.path.join(self.model_path, '{}_{}_D.pth'.format(e + 1, i + 1))) # Decay learning rate if (e + 1) > (self.num_epochs - self.num_epochs_decay): d_lr -= (self.d_lr / float(self.num_epochs_decay)) enc_lr -= (self.enc_lr / float(self.num_epochs_decay)) dec_lr -= (self.dec_lr / float(self.num_epochs_decay)) self.update_lr(enc_lr, dec_lr, d_lr) print('Decay learning rate to enc_lr: {}, d_lr: {}.'.format( enc_lr, d_lr)) def test(self): """Generating face images owning target attributes (desired expressions) """ # Load trained models Enc_path = os.path.join(self.model_path, '{}_Enc.pth'.format(self.test_model)) Dec_path = os.path.join(self.model_path, '{}_Dec.pth'.format(self.test_model)) self.Enc.load_state_dict(torch.load(Enc_path)) self.Dec.load_state_dict(torch.load(Dec_path)) self.Enc.eval() self.Dec.eval() data_loader = self.face_data_loader for i, (real_image, real_landmark) in enumerate(data_loader): org_c = real_image[1] real_x = real_image[0] real_l = real_landmark[0] real_x = self.to_var(real_x, volatile=True) real_l = self.to_var(real_l, volatile=True) target_y_list = [] for j in range(self.y_dim): target_y = self.one_hot( torch.ones(real_x.size(0)) * j, self.y_dim) target_y_list.append(self.to_var(target_y, volatile=True)) # Target image generation fake_image_list = [real_x] for target_y in target_y_list: enc_feat = self.Enc(torch.cat([real_x, real_l], 1)) sample_result, sample_landmark = self.Dec(enc_feat, target_y) fake_image_list.append(sample_result) fake_images = torch.cat(fake_image_list, dim=3) save_path = os.path.join(self.test_path, '{}_fake.png'.format(i + 1)) save_image(self.denorm(fake_images.data), save_path, nrow=1, padding=0) print('Generated images and saved into "{}"..!'.format(save_path))
def use_tensorboard(self): from tensorboard_logger import Logger self.logger = Logger(self.log_path)
import sys import os import warnings from model import CSRNet from utils import save_checkpoint from tensorboard_logger import Logger logger = Logger(logdir="./tensorboard_logs", flush_secs=10) import torch import torch.nn as nn from torch.autograd import Variable from torchvision import datasets, transforms import numpy as np import argparse import json import cv2 import dataset import time parser = argparse.ArgumentParser(description='PyTorch CSRNet') parser.add_argument('train_json', metavar='TRAIN', help='path to train json') parser.add_argument('test_json', metavar='TEST', help='path to test json') parser.add_argument('--pre', '-p', metavar='PRETRAINED',
def main(args, net=None): global oldassignment datadir = get_data_dir(args.db) outputdir = get_output_dir(args.db) logger = None if args.tensorboard: # One should create folder for storing logs loggin_dir = os.path.join(outputdir, 'runs', 'DCC') if not os.path.exists(loggin_dir): os.makedirs(loggin_dir) loggin_dir = os.path.join(loggin_dir, '%s' % (args.id)) if args.clean_log: remove_files_in_dir(loggin_dir) logger = Logger(loggin_dir) use_cuda = torch.cuda.is_available() # Set the seed for reproducing the results random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if use_cuda: torch.cuda.manual_seed_all(args.manualSeed) torch.backends.cudnn.enabled = True cudnn.benchmark = True startepoch = 0 kwargs = {'num_workers': 5, 'pin_memory': True} if use_cuda else {} # setting up dataset specific objects trainset = DCCPT_data(root=datadir, train=True, h5=args.h5) testset = DCCPT_data(root=datadir, train=False, h5=args.h5) numeval = len(trainset) + len(testset) # extracting training data from the pretrained.mat file data, labels, pairs, Z, sampweight = makeDCCinp(args) # For simplicity, I have created placeholder for each datasets and model load_pretraining = True if net is None else False if net is None: net = dp.load_predefined_extract_net(args) # reshaping data for some datasets if args.db == 'cmnist': data = data.reshape((-1, 1, 28, 28)) elif args.db == 'ccoil100': data = data.reshape((-1, 3, 128, 128)) elif args.db == 'cytf': data = data.reshape((-1, 3, 55, 55)) elif args.db == 'cyale': data = data.reshape((-1, 1, 168, 192)) totalset = torch.utils.data.ConcatDataset([trainset, testset]) # computing and initializing the hyperparams _sigma1, _sigma2, _lambda, _delta, _delta1, _delta2, lmdb, lmdb_data = computeHyperParams(pairs, Z) oldassignment = np.zeros(len(pairs)) stopping_threshold = int(math.ceil(cfg.STOPPING_CRITERION * float(len(pairs)))) # Create dataset and random batch sampler for Finetuning stage trainset = DCCFT_data(pairs, data, sampweight) batch_sampler = DCCSampler(trainset, shuffle=True, batch_size=args.batchsize) # copying model params from Pretrained (SDAE) weights file if load_pretraining: load_weights(args, outputdir, net) # creating objects for loss functions, U's are initialized to Z here # Criterion1 corresponds to reconstruction loss criterion1 = DCCWeightedELoss(size_average=True) # Criterion2 corresponds to sum of pairwise and data loss terms criterion2 = DCCLoss(Z.shape[0], Z.shape[1], Z, size_average=True) if use_cuda: net.cuda() criterion1 = criterion1.cuda() criterion2 = criterion2.cuda() # setting up data loader for training and testing phase trainloader = torch.utils.data.DataLoader(trainset, batch_sampler=batch_sampler, **kwargs) testloader = torch.utils.data.DataLoader(totalset, batch_size=args.batchsize, shuffle=False, **kwargs) # setting up optimizer - the bias params should have twice the learning rate w.r.t. weights params bias_params = filter(lambda x: ('bias' in x[0]), net.named_parameters()) bias_params = list(map(lambda x: x[1], bias_params)) nonbias_params = filter(lambda x: ('bias' not in x[0]), net.named_parameters()) nonbias_params = list(map(lambda x: x[1], nonbias_params)) optimizer = optim.Adam([{'params': bias_params, 'lr': 2*args.lr}, {'params': nonbias_params}, {'params': criterion2.parameters(), 'lr': args.lr}, ], lr=args.lr, betas=(0.99, 0.999)) # this is needed for WARM START if args.resume: filename = outputdir+'/FTcheckpoint_%d.pth.tar' % args.level if os.path.isfile(filename): print("==> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) net.load_state_dict(checkpoint['state_dict']) criterion2.load_state_dict(checkpoint['criterion_state_dict']) startepoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) _sigma1 = checkpoint['sigma1'] _sigma2 = checkpoint['sigma2'] _lambda = checkpoint['lambda'] _delta = checkpoint['delta'] _delta1 = checkpoint['delta1'] _delta2 = checkpoint['delta2'] else: print("==> no checkpoint found at '{}'".format(filename)) raise ValueError # This is the actual Algorithm flag = 0 for epoch in range(startepoch, args.nepoch): if logger: logger.log_value('sigma1', _sigma1, epoch) logger.log_value('sigma2', _sigma2, epoch) logger.log_value('lambda', _lambda, epoch) train(trainloader, net, optimizer, criterion1, criterion2, epoch, use_cuda, _sigma1, _sigma2, _lambda, logger) Z, U, change_in_assign, assignment = test(testloader, net, criterion2, epoch, use_cuda, _delta, pairs, numeval, flag, logger) if flag: # As long as the change in label assignment < threshold, DCC continues to run. # Note: This condition is always met in the very first epoch after the flag is set. # This false criterion is overwritten by checking for the condition twice. if change_in_assign > stopping_threshold: flag += 1 if flag == 4: break if((epoch+1) % args.M == 0): _sigma1 = max(_delta1, _sigma1 / 2) _sigma2 = max(_delta2, _sigma2 / 2) if _sigma2 == _delta2 and flag == 0: # Start checking for stopping criterion flag = 1 # Save checkpoint index = (epoch // args.M) * args.M save_checkpoint({'epoch': epoch+1, 'state_dict': net.state_dict(), 'criterion_state_dict': criterion2.state_dict(), 'optimizer': optimizer.state_dict(), 'sigma1': _sigma1, 'sigma2': _sigma2, 'lambda': _lambda, 'delta': _delta, 'delta1': _delta1, 'delta2': _delta2, }, index, filename=outputdir) output = {'Z': Z, 'U': U, 'gtlabels': labels, 'w': pairs, 'cluster':assignment} sio.savemat(os.path.join(outputdir, 'features'), output)
args = options.parser.parse_args() torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) device = torch.device("cuda") t_max = 750 t_max_ctc = 2800 if args.activity_net: t_max = 200 t_max_ctc = 400 dataset = Dataset(args) os.system('mkdir -p ./ckpt/') os.system('mkdir -p ./logs/' + args.model_name) logger = Logger('./logs/' + args.model_name) model = Model(dataset.feature_size, dataset.num_class, dataset.labels101to20).to(device) if args.eval_only and args.pretrained_ckpt is None: print('***************************') print('Pretrained Model NOT Loaded') print('Evaluating on Random Model') print('***************************') if args.pretrained_ckpt is not None: model.load_state_dict(torch.load(args.pretrained_ckpt)) best_acc = 0 optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005)