import numpy as np import torchvision from torchvision import datasets, models, transforms import matplotlib.pyplot as plt import time import copy import os from PIL import ImageFile ImageFile.LOAD_TRUNCATED_IMAGES = True from fine_tune_config import * ## If you want to keep a track of your network on tensorboard, set USE_TENSORBOARD TO 1 in config file. if USE_TENSORBOARD: from pycrayon import CrayonClient cc = CrayonClient(hostname=TENSORBOARD_SERVER) try: cc.remove_experiment(EXP_NAME) except: pass foo = cc.create_experiment(EXP_NAME) ## If you want to use the GPU, set GPU_MODE TO 1 in config file use_gpu = GPU_MODE if use_gpu: torch.cuda.set_device(CUDA_DEVICE) count = 0 ### SECTION 2 - data loading and shuffling/augmentation/normalization : all handled by torch automatically.
net.train() params = list(net.parameters()) # optimizer = torch.optim.Adam(params[-8:], lr=lr) optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay) if not os.path.exists(output_dir): os.makedirs(output_dir) # tensorboad use_tensorboard = use_tensorboard and CrayonClient is not None if use_tensorboard: cc = CrayonClient(hostname='127.0.0.1') if remove_all_log: cc.remove_all_experiments() if exp_name is None: exp_name = datetime.now().strftime('vgg16_%m-%d_%H-%M') exp = cc.create_experiment(exp_name) else: exp = cc.open_experiment(exp_name) # training train_loss = 0 tp, tf, fg, bg = 0., 0., 0, 0 step_cnt = 0 re_cnt = False t = Timer() t.tic()
# create directories for plots, models & evaluation plots_dir = f"{args.base_plots_dir}/{exp_name}" logger.info(f"For plotting using dir: {plots_dir}") plots_dir = create_directory(plots_dir, delete_if_exists=True) save_dir = f"{args.base_models_dir}/{exp_name}" logger.info(f"For model saving using dir: {save_dir}") save_dir = create_directory(save_dir, delete_if_exists=True) export_dir = f"{args.base_evaluation_dir}/{exp_name}" logger.info(f"For exporting final evaluation using dir: {export_dir}") export_dir = create_directory(export_dir, delete_if_exists=True) # Connect to server & start experiment ccexp = crayon_create_experiment(exp_name, CrayonClient()) # seed logger.info(f"Using seed: {args.numpy_seed}") np.random.seed(args.numpy_seed) # metrics to send to tensorboard mets = {"mean_discrepancy": 0.0, "identity_loss": 0.0} logger.info(f"Metrics which will be logged: {mets.keys()}") # initialization & training iterations N_init = args.n_init logger.info(f"Will train initialization for {N_init} iterations") N_train = args.n_train logger.info(f"Will train transport map for {N_train} iterations")
print(b.shape) data = b data=data.transpose() import torch import numpy as np import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.autograd import Variable import numpy as np from pycrayon import CrayonClient import time cc = CrayonClient(hostname="10.150.6.120") try: cc.remove_experiment("AnalyzeConv4") except: pass try: OMIE = cc.create_experiment("AnalyzeConv4") except: pass ## ## noise level one ## dimension 2 ### z는 따로 추출
import torch import numpy as np import torch.nn as nn from FCDensenet import FCDensenet from utils import train, valid, test from pycrayon import CrayonClient cc = CrayonClient(hostname="localhost") data = torch.load('polyp_data.pth') f = "FCDensenet.pth" FCDensenet_experiment = cc.create_experiment("FCDensenet_experiment") batch_size = 2 n_tr_batch = len(data[0]) // batch_size n_va_batch = len(data[2]) // batch_size n_te_batch = len(data[4]) // batch_size n_epochs = 500 n_c = 2 k = 16 cuda = True if cuda == True: model = FCDensenet(n_c, k, nn.ReLU(inplace=True)).cuda() else: model = FCDensenet(n_c, k, nn.ReLU(inplace=True)) optimizer = torch.optim.RMSprop(model.parameters()) criterion = nn.CrossEntropyLoss()
import torch import numpy as np import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.autograd import Variable import numpy as np mean = np.array([1, 2, 3, 1]) cov = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # = np.random.multivariate_normal(mean,cov,5000) from pycrayon import CrayonClient import time cc = CrayonClient(hostname="10.150.6.120") cc.remove_experiment("OMIE_5") OMIE = cc.create_experiment("OMIE_5") ### ### noise level one ### dimension 2 ### z는 따로 추출 ### input_size = 4 hidden_size = 8 hidden_size_ = 3 num_classes = 1 num_epochs = 9 learning_rate = 0.0001 debug_mode = True
loss_score, ans_loss_score, dis_loss_score, acc_score = validate_bireader(net, dev_loader, params) print("validation loss = {0:.10}, validation accuracy = {1:.5}". format(loss_score, acc_score)) print("answerer loss = {0:.10}, discrim. loss = {1:.10}". format(ans_loss_score, dis_loss_score)) loss_score, ans_loss_score, dis_loss_score, acc_score = validate_bireader(net, test_loader, params) print("validation loss = {0:.10}, validation accuracy = {1:.5}". format(loss_score, acc_score)) print("answerer loss = {0:.10}, discrim. loss = {1:.10}". format(ans_loss_score, dis_loss_score)) else: if arg.log: # crayon client cc = CrayonClient(hostname="localhost", port=8889) existing = len(cc.get_experiment_names()) ce = cc.create_experiment("run_{0}".format(existing), zip_file=None) print("now training...") train_1 = pd.read_pickle("../input_data/train_{0}.pkl".format(params['lang'])) train_2 = pd.read_pickle("../input_data/train_{0}.pkl".format(params['lang2'])) train_loader = tud.DataLoader(BiQADataset(train_1, train_2, nlp_1, nlp_2, rev_dic_1, rev_dic_2, relabel=params['relabel'], l2_supersample=params['l2_supersample']), batch_size=params['batch_size'], pin_memory=True, num_workers=3, shuffle=True) dev_1 = pd.read_pickle("../input_data/dev_{0}.pkl".format(params['lang'])) dev_2 = pd.read_pickle("../input_data/dev_{0}.pkl".format(params['lang2']))
def __init__(self, opt): self.opt = opt self.nums = nums self.istrain = opt.train self.cc = CrayonClient(hostname="localhost") if opt.cc else opt.cc self.cuda = opt.cuda
class _baseMuitlModel(object): '''Base Model combine netG and netD to became a gans's model @Params: - opt: options for config gans'model - train: train or test - nums: how many netGs - cc: crayon client or not - cuda: use cuda or not ''' def __init__(self, opt): self.opt = opt self.nums = nums self.istrain = opt.train self.cc = CrayonClient(hostname="localhost") if opt.cc else opt.cc self.cuda = opt.cuda def create_tensorboard(self): '''use docker create tensorboard ''' if self.cc: self.cc.remove_all_experiments() self.D_exp = create_sigle_experiment(self.cc, 'D_loss') self.G_exps = [] for i in range(self.nums): G_loss_experiment_name = 'G_loss_{}'.format(i) G_exp = create_sigle_experiment(self.cc, 'G_loss') self.G_exps.append(G_exp) def draft_data(self, input): '''input from datasetsloader, put those into X/Z ''' pass def backward_D(self): '''backwrad netD ''' pass def train(self): '''train gans ''' pass def test(self): '''test gans ''' pass def save_network(self, it, savepath): '''save checkpoints of netG and netD in savepath @Params: - it: number of iterations - savepath: in savepath, save network parameter ''' torch.save(self.netG.state_dict(), '%s/netG_epoch_%d.pth' % (savepath, it)) torch.save(self.netD.state_dict(), '%s/netD_epoch_%d.pth' % (savepath, it)) def load_networkG(self, g_network_path): '''load network parameters of netG and netD @Params: - g_network_path: the path of netG ''' self.netG.load_state_dict(torch.load(g_network_path)) def load_networkD(self, d_network_path): '''load network parameters of netG and netD @Params: - d_network_path: the path of netG ''' self.netD.load_state_dict(torch.load(d_network_path)) def save_image(self, fake, it, savepath): '''save result of netG output @Params: - fake: the output of netG - it: number of iterations - savepath: in savepath, save network parameter ''' vutils.save_image(fake.data, '%s/fake_samples_epoch_%03d.png' % (savepath, it))
net.cuda() net.train() print('load net succ...') # optimizer start_epoch = 0 lr = cfg.init_learning_rate optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) # tensorboad use_tensorboard = cfg.use_tensorboard and CrayonClient is not None if use_tensorboard: cc = CrayonClient(hostname='127.0.0.1') # if remove_all_log: # cc.remove_all_experiments() if start_epoch == 0: exp = cc.create_experiment(cfg.exp_name) else: exp = cc.open_experiment(cfg.exp_name) train_loss = 0 t = Timer() for step in range(start_epoch * imdb.batch_per_epoch, cfg.max_epoch * imdb.batch_per_epoch): t.tic() # batch batch = imdb.next_batch() im = batch['images']
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.autograd import Variable from RAN import restorator, discirminator from patch_wise import patch from pycrayon import CrayonClient import time cc = CrayonClient(hostname="localhost", port=8889) cc.remove_experiment('d_real_error') cc.remove_experiment('d_fake_error') cc.remove_experiment('g_error') d_real_errorC = cc.create_experiment('d_real_error') d_fake_errorC = cc.create_experiment('d_fake_error') g_errorC = cc.create_experiment('g_error') def extract(v): return v.data.storage().tolist() print 'Starting my Restoration Adversarial Net...' torch.manual_seed(123) torch.cuda.manual_seed(123) patchSize = 64 patches = patch()
def main(): config = DefaultConfigs() train_input_root = os.path.join(config.data) train_labels_file = 'labels.csv' if config.output: if not os.path.exists(config.output): os.makedirs(config.output) output_base = config.output else: if not os.path.exists(config.output): os.makedirs(config.output) output_base = config.output exp_name = '-'.join([ datetime.now().strftime("%Y%m%d-%H%M%S"), config.model, str(config.img_size), 'f' + str(config.fold) ]) mask_exp_name = '-'.join( [config.model, str(config.img_size), 'f' + str(config.fold)]) mask_exp_name = glob.glob( os.path.join(output_base, 'train', '*' + mask_exp_name)) if config.resume and mask_exp_name: output_dir = mask_exp_name else: output_dir = get_outdir(output_base, 'train', exp_name) batch_size = config.batch_size test_batch_size = config.test_batch_size num_epochs = config.epochs img_type = config.image_type img_size = (config.img_size, config.img_size) num_classes = get_tags_size(config.labels) torch.manual_seed(config.seed) dataset_train = HumanDataset( train_input_root, train_labels_file, train=True, multi_label=config.multi_label, img_type=img_type, img_size=img_size, fold=config.fold, ) #sampler = WeightedRandomOverSampler(dataset_train.get_sample_weights()) loader_train = data.DataLoader( dataset_train, batch_size=batch_size, shuffle=True, #sampler=sampler, num_workers=config.num_processes) dataset_eval = HumanDataset( train_input_root, train_labels_file, train=False, multi_label=config.multi_label, img_type=img_type, img_size=img_size, test_aug=config.tta, fold=config.fold, ) loader_eval = data.DataLoader(dataset_eval, batch_size=test_batch_size, shuffle=False, num_workers=config.num_processes) # model = model_factory.create_model( # config.model, # pretrained=True, # num_classes=num_classes, # drop_rate=config.drop, # global_pool=config.gp) model = get_net(config.model, num_classes, config.drop, config.channels) if not config.no_cuda: if config.num_gpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range( config.num_gpu))).cuda() else: model.cuda() if config.opt.lower() == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay) elif config.opt.lower() == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay) elif config.opt.lower() == 'adadelta': optimizer = optim.Adadelta(model.parameters(), lr=config.lr, weight_decay=config.weight_decay) elif config.opt.lower() == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.lr, alpha=0.9, momentum=config.momentum, weight_decay=config.weight_decay) elif config.opt.lower() == 'yellowfin': optimizer = YFOptimizer(model.parameters(), lr=config.lr, weight_decay=config.weight_decay, clip_thresh=2) else: assert False and "Invalid optimizer" if not config.decay_epochs: lr_scheduler = ReduceLROnPlateau(optimizer, patience=8) else: lr_scheduler = None if config.class_weights: class_weights = torch.from_numpy( dataset_train.get_class_weights()).float() class_weights_norm = class_weights / class_weights.sum() if not config.no_cuda: class_weights = class_weights.cuda() class_weights_norm = class_weights_norm.cuda() else: class_weights = None class_weights_norm = None if config.loss.lower() == 'nll': #assert not args.multi_label and 'Cannot use crossentropy with multi-label target.' loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights) elif config.loss.lower() == 'mlsm': assert config.multi_label loss_fn = torch.nn.MultiLabelSoftMarginLoss(weight=class_weights) else: assert config and "Invalid loss function" if not config.no_cuda: loss_fn = loss_fn.cuda() # optionally resume from a checkpoint start_epoch = 1 if config.resume: if os.path.isfile(config.resume): print("=> loading checkpoint '{}'".format(config.resume)) checkpoint = torch.load(config.resume) config.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( config.resume, checkpoint['epoch'])) start_epoch = checkpoint['epoch'] else: print("=> no checkpoint found at '{}'".format(config.resume)) exit(-1) use_tensorboard = not config.no_tb and CrayonClient is not None if use_tensorboard: hostname = '127.0.0.1' port = 8889 host_port = config.tbh.split(':')[:2] if len(host_port) == 1: hostname = host_port[0] elif len(host_port) >= 2: hostname, port = host_port[:2] try: cc = CrayonClient(hostname=hostname, port=port) try: cc.remove_experiment(exp_name) except ValueError: pass exp = cc.create_experiment(exp_name) except Exception as e: exp = None print( "Error (%s) connecting to Tensoboard/Crayon server. Giving up..." % str(e)) else: exp = None # Optional fine-tune of only the final classifier weights for specified number of epochs (or part of) if not config.resume and config.ft_epochs > 0.: if config.opt.lower() == 'adam': finetune_optimizer = optim.Adam(model.get_fc().parameters(), lr=config.ft_lr, weight_decay=config.weight_decay) else: finetune_optimizer = optim.SGD(model.get_fc().parameters(), lr=config.ft_lr, momentum=config.momentum, weight_decay=config.weight_decay) finetune_epochs_int = int(np.ceil(config.ft_epochs)) finetune_final_batches = int( np.ceil((1 - (finetune_epochs_int - config.ft_epochs)) * len(loader_train))) print(finetune_epochs_int, finetune_final_batches) for fepoch in range(1, finetune_epochs_int + 1): if fepoch == finetune_epochs_int and finetune_final_batches: batch_limit = finetune_final_batches else: batch_limit = 0 train_epoch(fepoch, model, loader_train, finetune_optimizer, loss_fn, config, class_weights_norm, output_dir, batch_limit=batch_limit) step = fepoch * len(loader_train) score, _ = validate(step, model, loader_eval, loss_fn, config, 0.3, output_dir) score_metric = 'f2' best_loss = None best_f2 = None threshold = 0.2 try: for epoch in range(start_epoch, num_epochs + 1): if config.decay_epochs: adjust_learning_rate(optimizer, epoch, initial_lr=config.lr, decay_epochs=config.decay_epochs) train_metrics = train_epoch(epoch, model, loader_train, optimizer, loss_fn, config, class_weights_norm, output_dir, exp=exp) step = epoch * len(loader_train) eval_metrics, latest_threshold = validate(step, model, loader_eval, loss_fn, config, threshold, output_dir, exp=exp) if lr_scheduler is not None: lr_scheduler.step(eval_metrics['eval_loss']) rowd = OrderedDict(epoch=epoch) rowd.update(train_metrics) rowd.update(eval_metrics) with open(os.path.join(output_dir, 'summary.csv'), mode='a') as cf: dw = csv.DictWriter(cf, fieldnames=rowd.keys()) if best_loss is None: # first iteration (epoch == 1 can't be used) dw.writeheader() dw.writerow(rowd) best = False if best_loss is None or eval_metrics['eval_loss'] < best_loss[1]: best_loss = (epoch, eval_metrics['eval_loss']) if score_metric == 'loss': best = True if best_f2 is None or eval_metrics['eval_f2'] > best_f2[1]: best_f2 = (epoch, eval_metrics['eval_f2']) if score_metric == 'f2': best = True save_checkpoint( { 'epoch': epoch + 1, 'arch': config.model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'threshold': latest_threshold, 'config': config }, is_best=best, filename=os.path.join(config.checkpoint_path, 'checkpoint-%d.pth.tar' % epoch), output_dir=output_dir) except KeyboardInterrupt: pass print('*** Best loss: {0} (epoch {1})'.format(best_loss[1], best_loss[0])) print('*** Best f2: {0} (epoch {1})'.format(best_f2[1], best_f2[0]))
import argparse parser = argparse.ArgumentParser(description='Lets win charades') parser.add_argument('-name', type=str, required=False, default="No name provided", help='Name of experiment') parser.add_argument('-resume', type=str, required=False, default=None, help='Path to resume model') args = parser.parse_args() print(args.name) if config.USE_GPU: torch.cuda.set_device(config.TORCH_DEVICE) cc = None if config.LOG: from pycrayon import CrayonClient os.system('') cc = CrayonClient(hostname="server_machine_address") from models.inflated_inception_attention import InceptionAttention net = InceptionAttention() from config import * from utils import * actionClassifier = getActionClassifier() device_ids=[0, 1, 2, 3] def parallel(var, device_ids=[0, 1, 2, 3]): return torch.nn.DataParallel(var, device_ids=device_ids) # Resume training from pretrained model resume_epoch = 0
h = self.bn(h) for f in self.res: h = f(h) y = self.fc(h) return y if __name__ == '__main__': # GPUフラグ gpu_fg = util.gpuCheck(sys.argv) if gpu_fg >= 0: cuda.check_cuda_available() xp = cuda.cupy if gpu_fg >= 0 else np # pycrayon 初期化 cc = CrayonClient(hostname="192.168.1.90", port=8889) # delete this experiment from the server try: cc.remove_experiment("ResNet train") cc.remove_experiment("ResNet test") except: pass # create a new experiment try: tb_res_train = cc.create_experiment("ResNet train") tb_res_test = cc.create_experiment("ResNet test") except: tb_res_train = cc.open_experiment("ResNet train") tb_res_test = cc.open_experiment("ResNet test")
z_dim = 100 h_dim = 128 x_dim_w, x_dim_h = train_loader.dataset.train_data.size()[1:3] x_dim = x_dim_w * x_dim_h train_size = train_loader.dataset.train_data.size()[0] y_dim = 10 lr = 1e-3 cnt = 0 display_cnt = 100 iter = 2 nets_num = 10 cuda = False netD_continue_trian = True cc = CrayonClient(hostname="localhost") cc.remove_all_experiments() D_exp = create_sigle_experiment(cc, 'D_loss') D_preb_real = create_sigle_experiment(cc, 'preb_real') D_preb_fake = create_sigle_experiment(cc, 'preb_fake') G_exps = create_experiments(cc, 10) netG_indeps = create_nets(config['G'][2], z_dim, nets_num) netG_share = build_netG(config['G'][3], h_dim) netD = build_netD(config['D'][2], x_dim) print netG_indeps print netG_share init_network(netG_share) init_network(netG_indeps)
###################################### y = self.fc8(h) ###################################### return y if __name__ == '__main__': # GPUフラグ gpu_fg = util.gpuCheck(sys.argv) if gpu_fg >= 0: cuda.check_cuda_available() xp = cuda.cupy if gpu_fg >= 0 else np # pycrayon 初期化 cc = CrayonClient(hostname="192.168.1.198", port=8889) # delete this experiment from the server try: cc.remove_experiment("AlexNet train (Adam)") cc.remove_experiment("AlexNet test (Adam)") except: pass # create a new experiment try: tb_alex_train = cc.create_experiment("AlexNet train (Adam)") tb_alex_test = cc.create_experiment("AlexNet test (Adam)") except: tb_alex_train = cc.open_experiment("AlexNet train (Adam)") tb_alex_test = cc.open_experiment("AlexNet test (Adam)")
def parse(self): opt = self.gather_options() # opt.isTrain = self.isTrain # train or test model = opt.model_name dataset_name = opt.dataset #dataset name - used for saving model file exp = 'v7-{}-{}-{}/'.format(dataset_name, model, datetime.now().strftime('exp-%m-%d_%H-%M')) expr_dir = './saved_models/{}/'.format(exp) #model files are saved here opt.crop_size = map(int, opt.crop_size.split('x')) if opt.save_model_para and not os.path.exists(expr_dir): makedirs('./saved_models') makedirs(expr_dir) makedirs(expr_dir+'./sup/') else: expr_dir = './temp1/' makedirs(expr_dir) if not os.path.exists(expr_dir+'./sup/'): os.mkdir(expr_dir+'./sup/') opt.expr_dir = expr_dir logger = logging.getLogger() fh = logging.FileHandler("{0}/{1}.log".format(expr_dir, 'log'), mode='w') fh.setFormatter(logging.Formatter(fmt="%(asctime)s %(message)s", datefmt="%d-%H:%M")) logger.addHandler(fh) opt.logger = logger self.opt = opt #Tensorboard config use_tensorboard = opt.use_tensorboard remove_all_log = False # remove all historical experiments in TensorBoardO use_tensorboard = use_tensorboard and CrayonClient is not None self.vis_exp = None if use_tensorboard: cc = CrayonClient(hostname='8.8.8.8', port=7879) if remove_all_log: cc.remove_all_experiments() random.seed(time.time()) vis_exp_name = exp + str(random.random()) opt.vis_exp_name = vis_exp_name self.vis_exp = cc.create_experiment(vis_exp_name) import socket hostname = socket.gethostname() # set gpu ids str_ids = opt.gpus.split(',') opt.gpus = [] for str_id in str_ids: id = int(str_id) if id >= 0: opt.gpus.append(id) if len(opt.gpus) > 0: torch.cuda.set_device(opt.gpus[0]) self.opt = opt self.print_options(opt) return self.opt
class CrayonWrapper: """ Wraps PyCrayon (https://github.com/torrvision/crayon), a language-agnostic interface to TensorBoard. """ def __init__(self, name, runs_distributed, runs_cluster, chief_handle, path_log_storage=None, crayon_server_address="localhost"): self._name = name self._path_log_storage = path_log_storage if path_log_storage is not None: create_dir_if_not_exist(path_log_storage) self._chief_handle = chief_handle self._crayon = CrayonClient(hostname=crayon_server_address) self._experiments = {} self.clear() self._custom_logs = { } # dict of exps containing dict of graph names containing lists of {step: val, } dicts self._ray = MaybeRay(runs_distributed=runs_distributed, runs_cluster=runs_cluster) @property def name(self): return self._name @property def path_log_storage(self): return self._path_log_storage def clear(self): """ Does NOT clear crayon's internal experiment logs and files. """ self._experiments = {} def export_all(self, iter_nr): """ Exports all logs of the current run in Tensorboard's format and as json files. """ if self._path_log_storage is not None: path_crayon = ospj(self._path_log_storage, str(self._name), str(iter_nr), "crayon") path_json = ospj(self._path_log_storage, str(self._name), str(iter_nr), "as_json") create_dir_if_not_exist(path=path_crayon) create_dir_if_not_exist(path=path_json) for e in self._experiments.values(): e.to_zip(filename=ospj(path_crayon, e.xp_name + ".zip")) write_dict_to_file_json(dictionary=self._custom_logs, _dir=path_json, file_name="logs") def update_from_log_buffer(self): """ Pulls newly added logs from the chief onto whatever worker CrayonWrapper runs on. It then adds all these new logs to Tensorboard (i.e. PyCrayon's docker container) """ new_v, exp_names = self._get_new_vals() for e in exp_names: if e not in self._experiments.keys(): self._custom_logs[e] = {} try: self._experiments[e] = self._crayon.create_experiment( xp_name=e) except ValueError: self._crayon.remove_experiment(xp_name=e) self._experiments[e] = self._crayon.create_experiment( xp_name=e) for name, vals_dict in new_v.items(): for graph_name, data_points in vals_dict.items(): for data_point in data_points: step = int(data_point[0]) val = data_point[1] self._experiments[name].add_scalar_value(name=graph_name, step=step, value=val) if graph_name not in self._custom_logs[name].keys(): self._custom_logs[name][graph_name] = [] self._custom_logs[name][graph_name].append({step: val}) def _get_new_vals(self): """ Returns: dict: Pulls and returns newly added logs from the chief onto whatever worker CrayonWrapper runs on. """ return self._ray.get( self._ray.remote(self._chief_handle.get_new_values))
def remove_all_experiments(hostname, port): ''' DANGER: don't use this, unless you're sure ''' tb = CrayonClient(hostname=hostname, port=port) tb.remove_all_experiments()
def train(model, data, params): """ Trains a model. Inputs: model (ATISModel): The model to train. data (ATISData): The data that is used to train. params (namespace): Training parameters. """ # Get the training batches. log = Logger(os.path.join(params.logdir, params.logfile), "w") num_train_original = atis_data.num_utterances(data.train_data) log.put("Original number of training utterances:\t" + str(num_train_original)) eval_fn = evaluate_utterance_sample trainbatch_fn = data.get_utterance_batches trainsample_fn = data.get_random_utterances validsample_fn = data.get_all_utterances batch_size = params.batch_size if params.interaction_level: batch_size = 1 eval_fn = evaluate_interaction_sample trainbatch_fn = data.get_interaction_batches trainsample_fn = data.get_random_interactions validsample_fn = data.get_all_interactions maximum_output_length = params.train_maximum_sql_length train_batches = trainbatch_fn(batch_size, max_output_length=maximum_output_length, randomize=not params.deterministic) if params.num_train >= 0: train_batches = train_batches[:params.num_train] training_sample = trainsample_fn(params.train_evaluation_size, max_output_length=maximum_output_length) valid_examples = validsample_fn(data.valid_data, max_output_length=maximum_output_length) num_train_examples = sum([len(batch) for batch in train_batches]) num_steps_per_epoch = len(train_batches) log.put( "Actual number of used training examples:\t" + str(num_train_examples)) log.put("(Shortened by output limit of " + str(maximum_output_length) + ")") log.put("Number of steps per epoch:\t" + str(num_steps_per_epoch)) log.put("Batch size:\t" + str(batch_size)) print( "Kept " + str(num_train_examples) + "/" + str(num_train_original) + " examples") print( "Batch size of " + str(batch_size) + " gives " + str(num_steps_per_epoch) + " steps per epoch") # Keeping track of things during training. epochs = 0 patience = params.initial_patience learning_rate_coefficient = 1. previous_epoch_loss = float('inf') maximum_validation_accuracy = 0. maximum_string_accuracy = 0. crayon = CrayonClient(hostname="localhost") experiment = crayon.create_experiment(params.logdir) countdown = int(patience) keep_training = True while keep_training: log.put("Epoch:\t" + str(epochs)) model.set_dropout(params.dropout_amount) model.set_learning_rate( learning_rate_coefficient * params.initial_learning_rate) # Run a training step. if params.interaction_level: epoch_loss = train_epoch_with_interactions( train_batches, params, model, randomize=not params.deterministic) else: epoch_loss = train_epoch_with_utterances( train_batches, model, randomize=not params.deterministic) log.put("train epoch loss:\t" + str(epoch_loss)) experiment.add_scalar_value("train_loss", epoch_loss, step=epochs) model.set_dropout(0.) # Run an evaluation step on a sample of the training data. train_eval_results = eval_fn(training_sample, model, params.train_maximum_sql_length, "train-eval", gold_forcing=True, metrics=TRAIN_EVAL_METRICS)[0] for name, value in train_eval_results.items(): log.put( "train final gold-passing " + name.name + ":\t" + "%.2f" % value) experiment.add_scalar_value( "train_gold_" + name.name, value, step=epochs) # Run an evaluation step on the validation set. valid_eval_results = eval_fn(valid_examples, model, "valid-eval", gold_forcing=True, metrics=VALID_EVAL_METRICS)[0] for name, value in valid_eval_results.items(): log.put("valid gold-passing " + name.name + ":\t" + "%.2f" % value) experiment.add_scalar_value( "valid_gold_" + name.name, value, step=epochs) valid_loss = valid_eval_results[Metrics.LOSS] valid_token_accuracy = valid_eval_results[Metrics.TOKEN_ACCURACY] string_accuracy = valid_eval_results[Metrics.STRING_ACCURACY] if valid_loss > previous_epoch_loss: learning_rate_coefficient *= params.learning_rate_ratio log.put( "learning rate coefficient:\t" + str(learning_rate_coefficient)) experiment.add_scalar_value( "learning_rate", learning_rate_coefficient, step=epochs) previous_epoch_loss = valid_loss saved = False if valid_token_accuracy > maximum_validation_accuracy: saved = True maximum_validation_accuracy = valid_token_accuracy patience = patience * params.patience_ratio countdown = int(patience) last_save_file = os.path.join(params.logdir, "save_" + str(epochs)) model.save(last_save_file) log.put("maximum accuracy:\t" + str(maximum_validation_accuracy)) log.put("patience:\t" + str(patience)) log.put("save file:\t" + str(last_save_file)) if not saved and string_accuracy > maximum_string_accuracy: maximum_string_accuracy = string_accuracy log.put( "maximum string accuracy:\t" + str(maximum_string_accuracy)) last_save_file = os.path.join(params.logdir, "save_" + str(epochs)) model.save(last_save_file) send_slack_message( username=params.logdir, message="Epoch " + str(epochs) + ": " + str(string_accuracy) + " validation accuracy; countdown is " + str(countdown), channel="models") if countdown <= 0: keep_training = False countdown -= 1 log.put("countdown:\t" + str(countdown)) experiment.add_scalar_value("countdown", countdown, step=epochs) log.put("") epochs += 1 log.put("Finished training!") send_slack_message(username=params.logdir, message="Done training!!", channel="@alsuhr") log.close() return last_save_file
import scipy.stats as st import numpy as np from pavooc.scoring.feature_extraction import extract_features, \ split_test_train_valid, normalize_features from pavooc.scoring.azimuth_dataset import load_dataset from pavooc.scoring.dataloader import DataLoader from pavooc.config import BATCH_SIZE, WEIGHTS_DIR, \ CONSERVATION_FEATURES_FILE, SCALER_FILE, DATADIR if cuda.is_available(): import torch.backends.cudnn as cudnn cudnn.benchmark = True try: crayon = CrayonClient(hostname="localhost", port=8889) except (ValueError, RuntimeError): crayon = None try: os.mkdir(WEIGHTS_DIR) except FileExistsError: pass def to_np(x): return x.data.cpu().numpy() def _init_model(feature_length, model_class, loss, learning_rate): model = model_class(feature_length)
return parser.parse_args() if __name__ == '__main__': args = __pars_args__() master_net = DFP_Network( (args.env_size**2) * 3, # observation_size = (args.env_size*args.env_size)*3 = battel_ground*colors num_offset=len(args.offset), a_size=args.action_space, num_measurements=args.num_measurements, is_master=True) master_net.share_memory() cc = CrayonClient(hostname="localhost") # cc.remove_all_experiments() processes = [] # p = mp.Process(target=work, args=(0, args, master_net, exp_buff, optimizer)) eval net # p.start() # processes.append(p) for rank in range(0, args.num_processes): # for rank in range(0, 1): p = mp.Process(target=work, args=(rank, args, master_net, cc, None)) p.start() processes.append(p) for p in processes: p.join()
if rnd <= 0: break res.append(i) cw = i if cw == stop: break if nchars and len(res) > nchars: break return res if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('corpus', help='Path to the corpus file.') parser.add_argument('crayserver', help='Server location for crayon.') parser.add_argument('expname', help='Experiment name') args = parser.parse_args() # Connect to the server cc = CrayonClient(hostname=args.crayserver) #Create a new experiment myexp = cc.create_experiment(args.expname) train = util.CharsCorpusReader(args.corpus, begin="<s>") vocab = util.Vocab.from_corpus(train) VOCAB_SIZE = vocab.size() model = dy.ParameterCollection() trainer = dy.SimpleSGDTrainer(model) #lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.SimpleRNNBuilder) lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.LSTMBuilder)
import numpy as np import torchvision from torchvision import datasets, models, transforms import matplotlib.pyplot as plt import time import copy import os from PIL import ImageFile ImageFile.LOAD_TRUNCATED_IMAGES = True from fine_tuning_config_file import * ## If you want to keep a track of your network on tensorboard, set USE_TENSORBOARD TO 1 in config file. if USE_TENSORBOARD: from pycrayon import CrayonClient cc = CrayonClient(hostname=TENSORBOARD_SERVER) try: cc.remove_experiment(EXP_NAME) except: pass foo = cc.create_experiment(EXP_NAME) ## If you want to use the GPU, set GPU_MODE TO 1 in config file use_gpu = GPU_MODE if use_gpu: torch.cuda.set_device(CUDA_DEVICE) count=0
def eval_loop(counter, args, shared_model, model_eval): try: SEC_PER_DAY = 24 * 60 * 60 env = build_env(args.type, args, treat_life_lost_as_terminal=False, max_time=5 * 60) model = copy.deepcopy(shared_model) model.eval() # Create a new experiment vis = visdom.Visdom(env='A3C:' + args.name) cc = CrayonClient() names = cc.get_experiment_names() summaries = [] for idx in range(args.n_eval): name = "{} [{}]".format(args.name, idx + 1) if name in names: cc.remove_experiment(name) summaries.append(cc.create_experiment(name)) max_reward = None save_condition = args.save_intervel rewards = [] start_time = time.time() while True: # Sync with the shared model model.load_state_dict(shared_model.state_dict()) restart, eval_start_time, eval_start_step = False, time.time( ), counter.value results = [] for i in range(args.n_eval): model.reset_state() results.append(model_eval(model, env, vis=(vis, i + 1, 60))) if env.exceed_max: restart = True env.reset() break env.reset() if restart: continue eval_end_time, eval_end_step = time.time(), counter.value results = EvalResult(*zip(*results)) rewards.append((counter.value, results.reward)) local_max_reward = np.max(results.reward) if max_reward is None or max_reward < local_max_reward: max_reward = local_max_reward if local_max_reward >= max_reward: # Save model torch.save(model.state_dict(), os.path.join(args.model_path, 'best_model.pth')) time_since_start = eval_end_time - start_time day = time_since_start // SEC_PER_DAY time_since_start %= SEC_PER_DAY seconds_to_finish = (args.n_steps - eval_end_step) / ( eval_end_step - eval_start_step) * (eval_end_time - eval_start_time) days_to_finish = seconds_to_finish // SEC_PER_DAY seconds_to_finish %= SEC_PER_DAY print("STEP:[{}|{}], Time: {}d {}, Finish in {}d {}".format( counter.value, args.n_steps, '%02d' % day, time.strftime("%Hh %Mm %Ss", time.gmtime(time_since_start)), '%02d' % days_to_finish, time.strftime("%Hh %Mm %Ss", time.gmtime(seconds_to_finish)))) print( '\tMax reward: {}, avg_reward: {}, std_reward: {}, min_reward: {}, max_reward: {}' .format(max_reward, np.mean(results.reward), np.std(results.reward), np.min(results.reward), local_max_reward)) # Plot for summary, reward in zip(summaries, results.reward): summary.add_scalar_value('reward', reward, step=eval_start_step) if counter.value > save_condition or counter.value >= args.n_steps: save_condition += args.save_intervel torch.save( model.state_dict(), os.path.join(args.model_path, 'model_iter_{}.pth'.format(counter.value))) torch.save(model.state_dict(), os.path.join(args.model_path, 'model_latest.pth')) with open(os.path.join(args.save_path, 'rewards'), 'a+') as f: for record in rewards: f.write('{}: {}\n'.format(record[0], record[1])) del rewards[:] if counter.value >= args.n_steps: print('Evaluator Finished !!!') break except KeyboardInterrupt: torch.save(shared_model.state_dict(), os.path.join(args.model_path, 'model_latest.pth')) raise
'label': prob }, ignore_index=True) # for proba df_pred['id'].astype(int) return df_pred if __name__ == '__main__': # tensorboad use_tensorboard = False # use_tensorboard = True and CrayonClient is not None if use_tensorboard == True: cc = CrayonClient(hostname='http://192.168.0.3') # cc.remove_all_experiments() trainloader, valloader, trainset, valset, classes, class_to_idx, num_to_class, df = loadDB( args) print('Çlasses {}'.format(classes)) models = ['senet'] for i in range(1, 5): for m in models: runId = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') fixSeed(args) model = selectModel(args, m) recorder = RecorderMeter(args.epochs) # epoc is updated model_name = (type(model).__name__) exp_name = datetime.datetime.now().strftime(model_name + '_' +
def main(): data_path = args.data model_name = args.model save_dir = args.save hidden_size = args.hidden_size pmnist = args.pmnist batch_size = args.batch_size max_iter = args.max_iter use_gpu = args.gpu if not os.path.exists(save_dir): os.makedirs(save_dir) if pmnist: perm = torch.randperm(784) else: perm = torch.arange(0, 784).long() train_dataset = datasets.MNIST(root=data_path, train=True, transform=transforms.Compose([ transforms.ToTensor(), transform_flatten, partial(transform_permute, perm=perm) ]), download=True) valid_dataset = datasets.MNIST(root=data_path, train=False, transform=transforms.Compose([ transforms.ToTensor(), transform_flatten, partial(transform_permute, perm=perm) ]), download=True) tb_client = CrayonClient() tb_xp_name = '{}-{}'.format(datetime.now().strftime("%y%m%d-%H%M%S"), save_dir) tb_xp_train = tb_client.create_experiment('{}/train'.format(tb_xp_name)) tb_xp_valid = tb_client.create_experiment('{}/valid'.format(tb_xp_name)) if model_name == 'bnlstm': model = LSTM(cell_class=BNLSTMCell, input_size=1, hidden_size=hidden_size, batch_first=True, max_length=784) elif model_name == 'lstm': model = LSTM(cell_class=LSTMCell, input_size=1, hidden_size=hidden_size, batch_first=True) else: raise ValueError fc = nn.Linear(in_features=hidden_size, out_features=10) loss_fn = nn.CrossEntropyLoss() params = list(model.parameters()) + list(fc.parameters()) optimizer = optim.RMSprop(params=params, lr=1e-3, momentum=0.9) def compute_loss_accuracy(data, label): hx = None if not pmnist: h0 = Variable( data.data.new(1, data.size(0), hidden_size).normal_(0, 0.1)) c0 = Variable( data.data.new(1, data.size(0), hidden_size).normal_(0, 0.1)) hx = (h0, c0) _, (h_n, _) = model(input_=data, hx=hx) logits = fc(h_n[0]) loss = loss_fn(input=logits, target=label) accuracy = (logits.max(1)[1] == label).float().mean() return loss, accuracy if use_gpu: model.cuda() fc.cuda() iter_cnt = 0 valid_loader = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=True, pin_memory=True) while iter_cnt < max_iter: train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True) for train_batch in train_loader: train_data, train_label = train_batch train_data = Variable(train_data) train_label = Variable(train_label) if use_gpu: train_data = train_data.cuda() train_label = train_label.cuda() model.train(True) model.zero_grad() train_loss, train_accuracy = compute_loss_accuracy( data=train_data, label=train_label) train_loss.backward() clip_grad_norm(parameters=params, max_norm=1) optimizer.step() tb_xp_train.add_scalar_dict(data={ 'loss': train_loss.data[0], 'accuracy': train_accuracy.data[0] }, step=iter_cnt) if iter_cnt % 50 == 49: for valid_batch in valid_loader: valid_data, valid_label = valid_batch # Dirty, but don't get other solutions break valid_data = Variable(valid_data, volatile=True) valid_label = Variable(valid_label, volatile=True) if use_gpu: valid_data = valid_data.cuda() valid_label = valid_label.cuda() model.train(False) valid_loss, valid_accuracy = compute_loss_accuracy( data=valid_data, label=valid_label) tb_xp_valid.add_scalar_dict(data={ 'loss': valid_loss.data[0], 'accuracy': valid_accuracy.data[0] }, step=iter_cnt) save_path = '{}/{}'.format(save_dir, iter_cnt) torch.save(model, save_path) iter_cnt += 1 if iter_cnt == max_iter: break
from pycrayon import CrayonClient import time # Connect to the server cc = CrayonClient(hostname="127.0.0.1") # Create a new experiment foo = cc.create_experiment("foo") # Send some scalar values to the server foo.add_scalar_value("accuracy", 0, wall_time=11.3) foo.add_scalar_value("accuracy", 4, wall_time=12.3) # You can force the time and step values foo.add_scalar_value("accuracy", 6, wall_time=13.3, step=4) # Get the datas sent to the server foo.get_scalar_values("accuracy") # >> [[11.3, 0, 0.0], [12.3, 1, 4.0], [13.3, 4, 6.0]]) # backup this experiment as a zip file filename = foo.to_zip() # delete this experiment from the server cc.remove_experiment("foo") # using the `foo` object from now on will result in an error # Create a new experiment based on foo's backup bar = cc.create_experiment("bar", zip_file=filename) # Get the name of all scalar plots in this experiment bar.get_scalar_names()
if torch.cuda.is_available() and not opt.gpuid: print("WARNING: You have a CUDA device, should run with -gpuid 0") if opt.gpuid: cuda.set_device(opt.gpuid[0]) if opt.seed > 0: torch.cuda.manual_seed(opt.seed) if len(opt.gpuid) > 1: sys.stderr.write("Sorry, multigpu isn't supported yet, coming soon!\n") sys.exit(1) # Set up the Crayon logging server. if opt.exp_host != "": from pycrayon import CrayonClient cc = CrayonClient(hostname=opt.exp_host) experiments = cc.get_experiment_names() print(experiments) if opt.exp in experiments: cc.remove_experiment(opt.exp) experiment = cc.create_experiment(opt.exp) def report_func(epoch, batch, num_batches, start_time, lr, report_stats): """ This is the user-defined batch-level traing progress report function. Args: epoch(int): current epoch count.
def main(): parser = argparse.ArgumentParser(description='mcnn worldexp.') parser.add_argument('--preload', type=int, default=1) parser.add_argument('--data', type=str, default="/mnt/m2/mzcc/crowd_data/worldexpo", help='train, test, etc') args = parser.parse_args() method = 'mcnn' dataset_name = 'worldexpo' output_dir = './saved_models/' data_path = args.data train_path = data_path+'/train_frame' train_gt_path = data_path+'/train_dmap' train_mask_path = os.path.join(data_path,'train_roi') val_path = data_path+'/test_frame' val_gt_path = data_path+'/test_dmap' val_mask_path = os.path.join(data_path, 'test_roi') #training configuration start_step = 0 end_step = 3000 lr = 0.000001 momentum = 0.9 disp_interval = 500 log_interval = 250 #Tensorboard config use_tensorboard = False save_exp_name = method + '_' + dataset_name + '_' + 'v1' remove_all_log = False # remove all historical experiments in TensorBoard exp_name = None # the previous experiment name in TensorBoard # ------------ rand_seed = 64678 if rand_seed is not None: np.random.seed(rand_seed) torch.manual_seed(rand_seed) torch.cuda.manual_seed(rand_seed) # load net net = CrowdCounter() network.weights_normal_init(net, dev=0.01) # network.weights_xavier_init(net, gain=0.01) net.cuda() net.train() params = list(net.parameters()) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr) if not os.path.exists(output_dir): os.mkdir(output_dir) # tensorboad use_tensorboard = use_tensorboard and CrayonClient is not None if use_tensorboard: cc = CrayonClient(hostname='127.0.0.1') if remove_all_log: cc.remove_all_experiments() if exp_name is None: exp_name = save_exp_name exp = cc.create_experiment(exp_name) else: exp = cc.open_experiment(exp_name) # training train_loss = 0 step_cnt = 0 re_cnt = False t = Timer() t.tic() data_loader = ExrImageDataLoader(train_path, train_gt_path, mask_path=train_mask_path, shuffle=True, gt_downsample=True, pre_load=args.preload) data_loader_val = ExrImageDataLoader(val_path, val_gt_path, mask_path=val_mask_path, shuffle=False, gt_downsample=True, pre_load=False) best_mae = 10000000 for epoch in range(start_step, end_step+1): step = -1 train_loss = 0 for blob in data_loader: step = step + 1 im_data = blob['data'] gt_data = blob['gt_density'] mask = blob['mask'] density_map = net(im_data, gt_data, mask=mask) loss = net.loss train_loss += loss.item()#.data[0] step_cnt += 1 optimizer.zero_grad() loss.backward() optimizer.step() if step % disp_interval == 0: print("current loss: {}".format(loss.item())) duration = t.toc(average=False) fps = step_cnt / duration gt_count = np.sum(gt_data) density_map = density_map.data.cpu().numpy() et_count = np.sum(density_map) utils.save_results(im_data,gt_data,density_map, output_dir) log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % (epoch, step, 1./fps, gt_count,et_count) log_print(log_text, color='green', attrs=['bold']) re_cnt = True if re_cnt: t.tic() re_cnt = False if (epoch % 2 == 0): save_name = os.path.join(output_dir, '{}_{}_{}.h5'.format(method,dataset_name,epoch)) network.save_net(save_name, net) #calculate error on the validation dataset mae,mse = evaluate_model(save_name, data_loader_val) if mae < best_mae: best_mae = mae best_mse = mse best_model = '{}_{}_{}.h5'.format(method,dataset_name,epoch) log_text = 'EPOCH: %d, MAE: %.1f, MSE: %0.1f' % (epoch,mae,mse) log_print(log_text, color='green', attrs=['bold']) log_text = 'BEST MAE: %0.1f, BEST MSE: %0.1f, BEST MODEL: %s' % (best_mae,best_mse, best_model) log_print(log_text, color='green', attrs=['bold']) if use_tensorboard: exp.add_scalar_value('MAE', mae, step=epoch) exp.add_scalar_value('MSE', mse, step=epoch) exp.add_scalar_value('train_loss', train_loss/data_loader.get_num_samples(), step=epoch)
if opt.seed > 0: torch.manual_seed(opt.seed) if torch.cuda.is_available() and not opt.gpus: print("WARNING: You have a CUDA device, should run with -gpus 0") opt.gpus = range(opt.gpus) if opt.gpus: #cuda.set_device(opt.gpus) if opt.seed > 0: torch.cuda.manual_seed(opt.seed) # Set up the Crayon logging server. if opt.log_server != "": from pycrayon import CrayonClient cc = CrayonClient(hostname=opt.log_server) experiments = cc.get_experiment_names() print(experiments) if opt.experiment_name in experiments: cc.remove_experiment(opt.experiment_name) experiment = cc.create_experiment(opt.experiment_name) def eval(model, criterion, data, fert_dict): stats = onmt.Loss.Statistics() model.eval() loss = onmt.Loss.MemoryEfficientLoss(opt, model.generator, criterion, eval=True,
print("WARNING: You have a CUDA device, should run with -gpuid 0") if opt.gpuid: cuda.set_device(opt.gpuid[0]) if opt.seed > 0: torch.cuda.manual_seed(opt.seed) if len(opt.gpuid) > 1: sys.stderr.write("Sorry, multigpu isn't supported yet, coming soon!\n") sys.exit(1) # Set up the Crayon logging server. if opt.exp_host != "": from pycrayon import CrayonClient cc = CrayonClient(hostname=opt.exp_host) experiments = cc.get_experiment_names() print(experiments) if opt.exp in experiments: cc.remove_experiment(opt.exp) experiment = cc.create_experiment(opt.exp) if opt.tensorboard: from tensorboardX import SummaryWriter writer = SummaryWriter( opt.tensorboard_log_dir + datetime.now().strftime("/%b-%d_%H-%M-%S"), comment="Onmt") progress_step = 0