class Tensorboard(Callback): def __init__(self, experiment_name: str, hostname=QB_TB_HOSTNAME, port=QB_TB_PORT): if host_is_up(hostname, port): from pycrayon import CrayonClient self.client = CrayonClient(hostname=hostname, port=port) self.experiment_name = experiment_name try: self.client.remove_experiment(experiment_name) except ValueError: pass self.experiment = self.client.create_experiment(experiment_name) else: log.info( f'Tensorboard not found on http://{hostname}:{port}, experiment logging disabled' ) self.client = None self.experiment_name = None self.experiment = None def on_epoch_end(self, logs): if self.client is not None: self.experiment.add_scalar_value('train_loss', logs['train_loss'][-1]) self.experiment.add_scalar_value('train_acc', logs['train_acc'][-1]) self.experiment.add_scalar_value('test_loss', logs['test_loss'][-1]) self.experiment.add_scalar_value('test_acc', logs['test_acc'][-1]) self.experiment.add_scalar_value('train_time', logs['train_time'][-1])
def test_remove_experiment(self): cc = CrayonClient(port=self.test_server_port) self.assertRaises(ValueError, cc.open_experiment, "foo") foo = cc.create_experiment("foo") foo.add_scalar_value("bar", 1, step=2, wall_time=0) self.assertRaises(ValueError, cc.create_experiment, "foo") cc.open_experiment("foo") cc.remove_experiment(foo.xp_name) self.assertRaises(ValueError, cc.remove_experiment, foo.xp_name) foo = cc.create_experiment("foo")
def tensorboard(): ''' ''' from pycrayon import CrayonClient cc = CrayonClient(hostname=TENSORBOARD_SERVER) try: cc.remove_experiment(EXP_NAME) except: pass foo = cc.create_experiment(EXP_NAME)
def setup_tensorboard(exp_id, cur_t, hostname, port): exp_filename = '{}_{}'.format(cur_t, exp_id) tb = CrayonClient(hostname=hostname, port=port) try: tb_experiment = tb.create_experiment(exp_filename) except: # flush the data anew tb.remove_experiment(exp_filename) tb_experiment = tb.create_experiment(exp_filename) return tb_experiment, tb
def parse_args(): parser = argparse.ArgumentParser( description='umt.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) opts.add_md_help_argument(parser) opts.model_opts(parser) opts.preprocess_opts(parser) opts.train_opts(parser) opt = parser.parse_args() torch.manual_seed(opt.seed) if opt.word_vec_size != -1: opt.src_word_vec_size = opt.word_vec_size opt.tgt_word_vec_size = opt.word_vec_size if opt.layers != -1: opt.enc_layers = opt.layers opt.dec_layers = opt.layers opt.brnn = (opt.encoder_type == "brnn") # if opt.seed > 0: random.seed(opt.seed) torch.manual_seed(opt.seed) if torch.cuda.is_available() and not opt.gpuid: print("WARNING: You have a CUDA device, should run with -gpuid 0") if opt.gpuid: cuda.set_device(opt.gpuid[0]) if opt.seed > 0: torch.cuda.manual_seed(opt.seed) if len(opt.gpuid) > 1: sys.stderr.write("Sorry, multigpu isn't supported yet, coming soon!\n") sys.exit(1) # Set up the Crayon logging server. if opt.exp_host != "": from pycrayon import CrayonClient cc = CrayonClient(hostname=opt.exp_host) experiments = cc.get_experiment_names() print(experiments) if opt.exp in experiments: cc.remove_experiment(opt.exp) return opt
class Tensorboard(Callback): def __init__(self, experiment_name: str, log_dir=None): from pycrayon import CrayonClient self.client = CrayonClient(port=6007) self.experiment_name = experiment_name try: self.client.remove_experiment(experiment_name) except ValueError: pass self.experiment = self.client.create_experiment(experiment_name) def on_epoch_end(self, logs): self.experiment.add_scalar_value('train_loss', logs['train_loss'][-1]) self.experiment.add_scalar_value('train_acc', logs['train_acc'][-1]) self.experiment.add_scalar_value('test_loss', logs['test_loss'][-1]) self.experiment.add_scalar_value('test_acc', logs['test_acc'][-1]) self.experiment.add_scalar_value('train_time', logs['train_time'][-1])
def create_crayon_logger(exp_name, port=8889): """ """ # Connect Crayon Logger (TensorBoard "wrapper") to the server cc = CrayonClient(hostname="localhost", port=port) tb_log_exp_name = exp_name # Remove previous experiment try: cc.remove_experiment(tb_log_exp_name) except ValueError: # experiment doesn't already exist - nothing to be done here print("Experiment '{}' didn't exist already (nothing to be done).".format(\ tb_log_exp_name)) # Create a new experiment tb_log = cc.create_experiment(tb_log_exp_name) return tb_log
class Monitor(object): def __init__(self, address, port): self.cc = CrayonClient(hostname=address, port=port) def start_experiment(self, name, clean=True): exps = self.cc.get_experiment_names() if name in exps: if clean: self.cc.remove_experiment(name) self.exp = self.cc.create_experiment(name) print 'clean and creat a new one' else: self.exp = self.cc.open_experiment(name) else: self.exp = self.cc.create_experiment(name) def push(self, data, wall_time=-1, step=-1): self.exp.add_scalar_dict(data, wall_time, step)
def test_backup(self): cc = CrayonClient(port=self.test_server_port) foo = cc.create_experiment("foo") foo.add_scalar_value("bar", 2, wall_time=time.time(), step=1) foo.add_scalar_value("bar", 2, wall_time=time.time(), step=2) foo_data = foo.get_scalar_values("bar") filename = foo.to_zip() cc.remove_experiment("foo") foo = cc.create_experiment("foo", zip_file=filename) new_data = foo.get_scalar_values("bar") self.assertEqual(foo_data, new_data) new = cc.create_experiment("new", zip_file=filename) new_data = new.get_scalar_values("bar") self.assertEqual(foo_data, new_data) os.remove(filename)
def get_crayon_experiment(exp_name, hostname='127.0.0.1', overwrite=True): cc = CrayonClient(hostname=hostname) cc_exp = None experiments = cc.get_experiment_names() if exp_name in experiments: if overwrite: cc.remove_experiment(exp_name) cc_exp = cc.create_experiment(exp_name) else: cc_exp = cc.open_experiment(exp_name) else: try: cc_exp = cc.create_experiment(exp_name) except ValueError: cc.remove_experiment(exp_name) cc_exp = cc.create_experiment(exp_name) return cc_exp
def make_crayon_experiments(experiment_name, new=True): client = CrayonClient(hostname=config.CRAYON_SERVER_HOSTNAME) train_experiment_name = f'{experiment_name}_train' valid_experiment_name = f'{experiment_name}_valid' if new: try: client.remove_experiment(train_experiment_name) except ValueError: pass try: client.remove_experiment(valid_experiment_name) except ValueError: pass train_experiment = client.create_experiment( train_experiment_name) train_experiment.scalar_steps['lr'] = 1 valid_experiment = client.create_experiment(valid_experiment_name) else: train_experiment = client.open_experiment(train_experiment_name) valid_experiment = client.open_experiment(valid_experiment_name) return train_experiment, valid_experiment
def crayon_create_experiment(exp_name: str, cclient: CrayonClient, overwrite: bool = True) -> CrayonExperiment: """ Create experiment name in the alband/crayon tensorboard :param exp_name: name of experiment :param cclient: handler of requests to crayon :param overwrite: if the experiment already exists delete and recreate :return: """ try: ccexp = cclient.create_experiment(exp_name) return ccexp except ValueError as verr: if overwrite: cclient.remove_experiment(exp_name) ccexp = cclient.create_experiment(exp_name) return ccexp else: raise verr
def main(): # Set up the Crayon logging server. if opt.log_server != "": from pycrayon import CrayonClient cc = CrayonClient(hostname=opt.log_server) experiments = cc.get_experiment_names() print(experiments) if opt.experiment_name in experiments: cc.remove_experiment(opt.experiment_name) opt.experiment_name = cc.create_experiment(opt.experiment_name) print("Loading data from '%s'" % opt.data) dataset = torch.load(opt.data) dict_checkpoint = (opt.train_from if opt.train_from else opt.train_from_state_dict) if dict_checkpoint: print('Loading dicts from checkpoint at %s' % dict_checkpoint) checkpoint = torch.load(dict_checkpoint, map_location=lambda storage, loc: storage) #dataset['dicts'] = checkpoint['dicts'] if opt.redis: trainData = onmt.RedisDataset("train", opt.batch_size, False, reverse=opt.reverse, port=opt.port, db=opt.db, r2l=opt.r2l) validData = onmt.RedisDataset('valid', opt.batch_size, False, volatile=True, reverse=opt.reverse, port=opt.port, r2l=opt.r2l, db=opt.db) else: trainData = onmt.Dataset(dataset['train']['src'], dataset['train']['tgt'], opt.batch_size, False, data_type=dataset.get("type", "text"), srcFeatures=dataset['train'].get('src_features'), tgtFeatures=dataset['train'].get('tgt_features'), alignment=dataset['train'].get('alignments')) validData = onmt.Dataset(dataset['valid']['src'], dataset['valid']['tgt'], opt.batch_size, False, volatile=True, data_type=dataset.get("type", "text"), srcFeatures=dataset['valid'].get('src_features'), tgtFeatures=dataset['valid'].get('tgt_features'), alignment=dataset['valid'].get('alignments')) dicts = dataset['dicts'] if opt.reverse: dicts['src'], dicts['tgt'] = dicts['tgt'], dicts['src'] dicts['src_features'], dicts['tgt_features'] = dicts['tgt_features'], dicts['src_features'] print(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) #if 'src_features' in dicts: # for j in range(len(dicts['src_features'])): # print(' * src feature %d size = %d' % # (j, dicts['src_features'][j].size())) #print(' * number of training sentences. %d' % #len(dataset['train']['src'])) print(' * maximum batch size. %d' % opt.batch_size) print('Building model...') if opt.encoder_type == "text": encoder = onmt.Models.Encoder(opt, dicts['src'], dicts.get('src_features', None)) elif opt.encoder_type == "img": encoder = onmt.modules.ImageEncoder(opt) assert("type" not in dataset or dataset["type"] == "img") else: print("Unsupported encoder type %s" % (opt.encoder_type)) decoder = onmt.Models.Decoder(opt, dicts['tgt']) if opt.copy_attn: generator = onmt.modules.CopyGenerator(opt, dicts['src'], dicts['tgt']) else: generator = nn.Sequential( nn.Linear(opt.rnn_size, dicts['tgt'].size()), nn.LogSoftmax()) if opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight model = onmt.Models.NMTModel(encoder, decoder, len(opt.gpus) > 1) if opt.train_from: print('Loading model from checkpoint at %s' % opt.train_from) chk_model = checkpoint['model'] generator_state_dict = chk_model.generator.state_dict() model_state_dict = {k: v for k, v in chk_model.state_dict().items() if 'generator' not in k} model.load_state_dict(model_state_dict) generator.load_state_dict(generator_state_dict) opt.start_epoch = checkpoint['epoch'] + 1 if opt.train_from_state_dict: print('Loading model from checkpoint at %s' % opt.train_from_state_dict) model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) opt.start_epoch = checkpoint['epoch'] + 1 model.cpu() generator.cpu() model.generator = generator if not opt.train_from_state_dict and not opt.train_from: if opt.param_init != 0.0: print('Intializing params') for p in model.parameters(): p.data.uniform_(-opt.param_init, opt.param_init) encoder.embeddings.load_pretrained_vectors(opt.pre_word_vecs_enc) decoder.embeddings.load_pretrained_vectors(opt.pre_word_vecs_dec) optim = onmt.Optim( opt.optim, opt.learning_rate, opt.max_grad_norm, lr_decay=opt.learning_rate_decay, start_decay_at=opt.start_decay_at, opt=opt ) else: print('Loading optimizer from checkpoint:') optim = checkpoint['optim'] print(optim) if opt.train_from or opt.train_from_state_dict: optim.optimizer.load_state_dict( checkpoint['optim'].optimizer.state_dict()) print('Multi gpu training ', opt.gpus) trainer = MultiprocessingTrainer(opt, model, optim, device_ids=opt.gpus) nParams = sum([p.nelement() for p in model.parameters()]) print('* number of parameters: %d' % nParams) enc = 0 dec = 0 for name, param in model.named_parameters(): if 'encoder' in name: enc += param.nelement() elif 'decoder' in name: dec += param.nelement() else: print(name, param.nelement()) print('encoder: ', enc) print('decoder: ', dec) trainModel(trainer, trainData, validData, dataset)
class DDPGOptimizer(object): """docstring for DDPGOptimizer""" def __init__(self, agent, capacity, batch_size, gamma, tau, init_lr, weight_decay, crayon_vis): super(DDPGOptimizer, self).__init__() self.agent = agent self.gamma = gamma self.tau = tau self.memory = ReplayMemory(capacity, batch_size) self.critic_criterion = nn.MSELoss() self.critic_optimizer = optim.Adam(self.agent.critic.parameters(), lr=init_lr[ 'critic'], weight_decay=weight_decay) self.actor_optimizer = optim.Adam( self.agent.actor.parameters(), lr=init_lr['actor']) self.crayon_vis = crayon_vis if self.crayon_vis: self.cc = CrayonClient() try: self.stats = self.cc.create_experiment('stats') except ValueError: self.cc.remove_experiment('stats') self.stats = self.cc.create_experiment('stats') def step(self): samples = self.memory.sample() states, actions, rewards, next_states = map( lambda x: np.asarray(x), zip(*samples)) # Update critic network self.critic_optimizer.zero_grad() outputs = self.agent.critic(Variable(torch.from_numpy( states)), Variable(torch.from_numpy(actions))) no_final_states = np.array( [ns for ns in next_states if ns is not None]) no_final_targets = self.agent.critic_target(Variable(torch.from_numpy(no_final_states), volatile=True), self.agent.actor_target(Variable(torch.from_numpy(no_final_states), volatile=True))) targets = Variable(torch.zeros(self.memory.batch_size, 1)) mask = Variable(torch.ByteTensor( [ns is not None for ns in next_states]).view(-1, 1)) targets.masked_copy_(mask, no_final_targets) targets = self.gamma * targets + \ Variable(torch.from_numpy(rewards).unsqueeze(1)) targets = targets.detach() loss = self.critic_criterion(outputs, targets) if self.crayon_vis: self.stats.add_scalar_value('critic loss', loss.data[0]) loss.backward() # critic_visualizer = pytorch_net_visualizer(loss) # critic_visualizer.view() # input("Visualizing critic networks...") ''' # gradient clamping in case of gradient explosion for param in self.agent.critic.parameters(): param.grad.data.clamp_(-1, 1) ''' self.critic_optimizer.step() # Update actor network self.critic_optimizer.zero_grad() self.actor_optimizer.zero_grad() outputs = self.agent.critic(Variable(torch.from_numpy(states), requires_grad=False), self.agent.actor(Variable(torch.from_numpy(states), requires_grad=True))) # negation since we want the police increase the likelihood of good reward trajectory outputs = -torch.mean(outputs) outputs.backward() # actor_visualizer = pytorch_net_visualizer(outputs) # actor_visualizer.view() # input("Visualizing actor networks...") self.actor_optimizer.step() # Update target network for param, param_target in zip(self.agent.critic.parameters(), self.agent.critic_target.parameters()): param_target = self.tau * param + (1 - self.tau) * param_target for param, param_target in zip(self.agent.actor.parameters(), self.agent.actor_target.parameters()): param_target = self.tau * param + (1 - self.tau) * param_target return loss.data[0]
val_loader = LoaderFactory.create_dataloader(LoaderMode.VAL, args, do_use_gpu=args.cuda) if args.do_test: test_loader = LoaderFactory.create_dataloader(LoaderMode.TEST, args, do_use_gpu=args.cuda) #%% Create logger if args.do_train: # Connect Crayon Logger (TensorBoard "wrapper") to the server cc = CrayonClient(hostname="localhost", port=8889) tb_log_exp_name = args.exp_name # Remove previous experiment try: cc.remove_experiment(tb_log_exp_name) except ValueError: # experiment doesn't already exist - nothing to be done here print("Experiment '{}' didn't exist already (nothing to be done).".format(\ tb_log_exp_name)) # Create a new experiment tb_log = cc.create_experiment(tb_log_exp_name) #%% Train (Load) model # Create and init predictor model = NetFactory.create_net(net_type=args.net_type, params=args, num_prior_dims=args.num_embedding_dims, num_cond_dims=args.num_cond_dims, num_joints=args.num_joints, num_features=args.num_features)
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.autograd import Variable from RAN import restorator, discirminator from patch_wise import patch from pycrayon import CrayonClient import time cc = CrayonClient(hostname="localhost", port=8889) cc.remove_experiment('d_real_error') cc.remove_experiment('d_fake_error') cc.remove_experiment('g_error') d_real_errorC = cc.create_experiment('d_real_error') d_fake_errorC = cc.create_experiment('d_fake_error') g_errorC = cc.create_experiment('g_error') def extract(v): return v.data.storage().tolist() print 'Starting my Restoration Adversarial Net...' torch.manual_seed(123) torch.cuda.manual_seed(123) patchSize = 64 patches = patch()
print('lr', lookup_lr(cfg, start_epoch)) print('-------------------------------') # tensorboad use_tensorboard = cfg.use_tensorboard and CrayonClient is not None use_tensorboard = False remove_all_log = True if use_tensorboard: cc = CrayonClient(hostname='127.0.0.1') if remove_all_log: print('remove all experiments') cc.remove_all_experiments() if start_epoch == 0: try: cc.remove_experiment(cfg.exp_name) except ValueError: pass exp = cc.create_experiment(cfg.exp_name) else: exp = cc.open_experiment(cfg.exp_name) train_loss = 0 bbox_loss, iou_loss, cls_loss = 0., 0., 0. cnt = 0 timer = Timer() # default input size network_size = cfg.inp_size
print("WARNING: You have a CUDA device, should run with -gpus 0") if opt.gpus: cuda.set_device(opt.gpus[0]) if opt.seed > 0: torch.cuda.manual_seed(opt.seed) # Set up the Crayon logging server. if opt.log_server != "": from pycrayon import CrayonClient cc = CrayonClient(hostname=opt.log_server) experiments = cc.get_experiment_names() print(experiments) if opt.experiment_name in experiments: cc.remove_experiment(opt.experiment_name) experiment = cc.create_experiment(opt.experiment_name) def eval(model, criterion, data): stats = onmt.Loss.Statistics() model.eval() loss = onmt.Loss.MemoryEfficientLoss(opt, model.generator, criterion, eval=True, copy_loss=opt.copy_attn) for i in range(len(data)): batch = data[i] outputs, attn, dec_hidden = model(batch.src, batch.tgt, batch.lengths) batch_stats, _, _ = loss.loss(batch, outputs, attn)
import torch import numpy as np import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.autograd import Variable from pycrayon import CrayonClient import time cc = CrayonClient(hostname="10.150.6.120") cc.remove_experiment("OMIE_2") OMIE = cc.create_experiment("OMIE_2") ### ### noise level one ### dimension 2 ### z는 따로 추출 ### input_size = 2 hidden_size = 5 hidden_size_ = 3 num_classes = 1 num_epochs = 9 learning_rate = 0.00001 debug_mode = True
# Create a new experiment foo = cc.create_experiment("foo") # Send some scalar values to the server foo.add_scalar_value("accuracy", 0, wall_time=11.3) foo.add_scalar_value("accuracy", 4, wall_time=12.3) # You can force the time and step values foo.add_scalar_value("accuracy", 6, wall_time=13.3, step=4) # Get the datas sent to the server foo.get_scalar_values("accuracy") # >> [[11.3, 0, 0.0], [12.3, 1, 4.0], [13.3, 4, 6.0]]) # backup this experiment as a zip file filename = foo.to_zip() # delete this experiment from the server cc.remove_experiment("foo") # using the `foo` object from now on will result in an error # Create a new experiment based on foo's backup bar = cc.create_experiment("bar", zip_file=filename) # Get the name of all scalar plots in this experiment bar.get_scalar_names() # >> ["accuracy"] # Get the data for this experiment bar.get_scalar_values("accuracy") # >> [[11.3, 0, 0.0], [12.3, 1, 4.0], [13.3, 4, 6.0]]) cc.remove_experiment("bar")
from torchvision import datasets, models, transforms import matplotlib.pyplot as plt import time import copy import os from PIL import ImageFile ImageFile.LOAD_TRUNCATED_IMAGES = True from fine_tuning_config_file import * ## If you want to keep a track of your network on tensorboard, set USE_TENSORBOARD TO 1 in config file. if USE_TENSORBOARD: from pycrayon import CrayonClient cc = CrayonClient(hostname=TENSORBOARD_SERVER) try: cc.remove_experiment(EXP_NAME) except: pass foo = cc.create_experiment(EXP_NAME) ## If you want to use the GPU, set GPU_MODE TO 1 in config file use_gpu = GPU_MODE if use_gpu: torch.cuda.set_device(CUDA_DEVICE) count=0 ### SECTION 2 - data loading and shuffling/augmentation/normalization : all handled by torch automatically.
class CrayonWrapper: """ Wraps PyCrayon (https://github.com/torrvision/crayon), a language-agnostic interface to TensorBoard. """ def __init__(self, name, runs_distributed, runs_cluster, chief_handle, path_log_storage=None, crayon_server_address="localhost"): self._name = name self._path_log_storage = path_log_storage if path_log_storage is not None: create_dir_if_not_exist(path_log_storage) self._chief_handle = chief_handle self._crayon = CrayonClient(hostname=crayon_server_address) self._experiments = {} self.clear() self._custom_logs = { } # dict of exps containing dict of graph names containing lists of {step: val, } dicts self._ray = MaybeRay(runs_distributed=runs_distributed, runs_cluster=runs_cluster) @property def name(self): return self._name @property def path_log_storage(self): return self._path_log_storage def clear(self): """ Does NOT clear crayon's internal experiment logs and files. """ self._experiments = {} def export_all(self, iter_nr): """ Exports all logs of the current run in Tensorboard's format and as json files. """ if self._path_log_storage is not None: path_crayon = ospj(self._path_log_storage, str(self._name), str(iter_nr), "crayon") path_json = ospj(self._path_log_storage, str(self._name), str(iter_nr), "as_json") create_dir_if_not_exist(path=path_crayon) create_dir_if_not_exist(path=path_json) for e in self._experiments.values(): e.to_zip(filename=ospj(path_crayon, e.xp_name + ".zip")) write_dict_to_file_json(dictionary=self._custom_logs, _dir=path_json, file_name="logs") def update_from_log_buffer(self): """ Pulls newly added logs from the chief onto whatever worker CrayonWrapper runs on. It then adds all these new logs to Tensorboard (i.e. PyCrayon's docker container) """ new_v, exp_names = self._get_new_vals() for e in exp_names: if e not in self._experiments.keys(): self._custom_logs[e] = {} try: self._experiments[e] = self._crayon.create_experiment( xp_name=e) except ValueError: self._crayon.remove_experiment(xp_name=e) self._experiments[e] = self._crayon.create_experiment( xp_name=e) for name, vals_dict in new_v.items(): for graph_name, data_points in vals_dict.items(): for data_point in data_points: step = int(data_point[0]) val = data_point[1] self._experiments[name].add_scalar_value(name=graph_name, step=step, value=val) if graph_name not in self._custom_logs[name].keys(): self._custom_logs[name][graph_name] = [] self._custom_logs[name][graph_name].append({step: val}) def _get_new_vals(self): """ Returns: dict: Pulls and returns newly added logs from the chief onto whatever worker CrayonWrapper runs on. """ return self._ray.get( self._ray.remote(self._chief_handle.get_new_values))
return y if __name__ == '__main__': # GPUフラグ gpu_fg = util.gpuCheck(sys.argv) if gpu_fg >= 0: cuda.check_cuda_available() xp = cuda.cupy if gpu_fg >= 0 else np # pycrayon 初期化 cc = CrayonClient(hostname="192.168.1.198", port=8889) # delete this experiment from the server try: cc.remove_experiment("AlexNet train (Adam)") cc.remove_experiment("AlexNet test (Adam)") except: pass # create a new experiment try: tb_alex_train = cc.create_experiment("AlexNet train (Adam)") tb_alex_test = cc.create_experiment("AlexNet test (Adam)") except: tb_alex_train = cc.open_experiment("AlexNet train (Adam)") tb_alex_test = cc.open_experiment("AlexNet test (Adam)") # x_train: 32*32*3 train, test = get_cifar10() x_train, t_train = train._datasets
torch.cuda.manual_seed(opt.seed) if len(opt.gpuid) > 1: sys.stderr.write("Sorry, multigpu isn't supported yet, coming soon!\n") sys.exit(1) # Set up the Crayon logging server. if opt.exp_host != "": from pycrayon import CrayonClient cc = CrayonClient(hostname=opt.exp_host) experiments = cc.get_experiment_names() print(experiments) if opt.exp in experiments: cc.remove_experiment(opt.exp) experiment = cc.create_experiment(opt.exp) if opt.tensorboard: from tensorboardX import SummaryWriter writer = SummaryWriter( opt.tensorboard_log_dir + datetime.now().strftime("/%b-%d_%H-%M-%S"), comment="Onmt") progress_step = 0 def report_func(epoch, batch, num_batches, progress_step, start_time, lr, report_stats): """
lr=lr) if not os.path.exists(output_dir): os.mkdir(output_dir) # tensorboad use_tensorboard = use_tensorboard and CrayonClient is not None if use_tensorboard: cc = CrayonClient(hostname='127.0.0.1') if remove_all_log: cc.remove_all_experiments() if exp_name is None: exp_name = datetime.now().strftime('vgg16_%m-%d_%H-%M') exp_name = save_exp_name if exp_name in cc.get_experiment_names(): cc.remove_experiment(exp_name) exp = cc.create_experiment(exp_name) else: exp = cc.open_experiment(exp_name) # training train_loss = 0 step_cnt = 0 re_cnt = False t = Timer() t.tic() best_mae = sys.maxsize for epoch in range(start_step, end_step + 1): step = -1
if __name__ == '__main__': # GPUフラグ gpu_fg = util.gpuCheck(sys.argv) if gpu_fg >= 0: cuda.check_cuda_available() xp = cuda.cupy if gpu_fg >= 0 else np # pycrayon 初期化 <<<<<<< HEAD cc = CrayonClient(hostname="192.168.1.201", port=8889) ======= cc = CrayonClient(hostname="", port=8889) >>>>>>> d6f052dba3a3f893fd80497288d9412d8cc1c097 # delete this experiment from the server try: cc.remove_experiment("MNIST_DCGAN_GEN") cc.remove_experiment("MNIST_DCGAN_DIS") except: pass # create a new experiment try: tb_gen = cc.create_experiment("MNIST_DCGAN_GEN") tb_dis = cc.create_experiment("MNIST_DCGAN_DIS") except: tb_gen = cc.open_experiment("MNIST_DCGAN_GEN") tb_dis = cc.open_experiment("MNIST_DCGAN_DIS") # Training Data train, test = chainer.datasets.get_mnist()
import torch import numpy as np import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.autograd import Variable import numpy as np from pycrayon import CrayonClient import time cc = CrayonClient(hostname="10.150.6.120") try: cc.remove_experiment("AnalyzeConv3") except: pass try: OMIE = cc.create_experiment("AnalyzeConv3") except: pass ## ## noise level one ## dimension 2 ### z는 따로 추출 ### input_size = 64 hidden_size = 128
def main(): config = DefaultConfigs() train_input_root = os.path.join(config.data) train_labels_file = 'labels.csv' if config.output: if not os.path.exists(config.output): os.makedirs(config.output) output_base = config.output else: if not os.path.exists(config.output): os.makedirs(config.output) output_base = config.output exp_name = '-'.join([ datetime.now().strftime("%Y%m%d-%H%M%S"), config.model, str(config.img_size), 'f' + str(config.fold) ]) mask_exp_name = '-'.join( [config.model, str(config.img_size), 'f' + str(config.fold)]) mask_exp_name = glob.glob( os.path.join(output_base, 'train', '*' + mask_exp_name)) if config.resume and mask_exp_name: output_dir = mask_exp_name else: output_dir = get_outdir(output_base, 'train', exp_name) batch_size = config.batch_size test_batch_size = config.test_batch_size num_epochs = config.epochs img_type = config.image_type img_size = (config.img_size, config.img_size) num_classes = get_tags_size(config.labels) torch.manual_seed(config.seed) dataset_train = HumanDataset( train_input_root, train_labels_file, train=True, multi_label=config.multi_label, img_type=img_type, img_size=img_size, fold=config.fold, ) #sampler = WeightedRandomOverSampler(dataset_train.get_sample_weights()) loader_train = data.DataLoader( dataset_train, batch_size=batch_size, shuffle=True, #sampler=sampler, num_workers=config.num_processes) dataset_eval = HumanDataset( train_input_root, train_labels_file, train=False, multi_label=config.multi_label, img_type=img_type, img_size=img_size, test_aug=config.tta, fold=config.fold, ) loader_eval = data.DataLoader(dataset_eval, batch_size=test_batch_size, shuffle=False, num_workers=config.num_processes) # model = model_factory.create_model( # config.model, # pretrained=True, # num_classes=num_classes, # drop_rate=config.drop, # global_pool=config.gp) model = get_net(config.model, num_classes, config.drop, config.channels) if not config.no_cuda: if config.num_gpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range( config.num_gpu))).cuda() else: model.cuda() if config.opt.lower() == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay) elif config.opt.lower() == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay) elif config.opt.lower() == 'adadelta': optimizer = optim.Adadelta(model.parameters(), lr=config.lr, weight_decay=config.weight_decay) elif config.opt.lower() == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.lr, alpha=0.9, momentum=config.momentum, weight_decay=config.weight_decay) elif config.opt.lower() == 'yellowfin': optimizer = YFOptimizer(model.parameters(), lr=config.lr, weight_decay=config.weight_decay, clip_thresh=2) else: assert False and "Invalid optimizer" if not config.decay_epochs: lr_scheduler = ReduceLROnPlateau(optimizer, patience=8) else: lr_scheduler = None if config.class_weights: class_weights = torch.from_numpy( dataset_train.get_class_weights()).float() class_weights_norm = class_weights / class_weights.sum() if not config.no_cuda: class_weights = class_weights.cuda() class_weights_norm = class_weights_norm.cuda() else: class_weights = None class_weights_norm = None if config.loss.lower() == 'nll': #assert not args.multi_label and 'Cannot use crossentropy with multi-label target.' loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights) elif config.loss.lower() == 'mlsm': assert config.multi_label loss_fn = torch.nn.MultiLabelSoftMarginLoss(weight=class_weights) else: assert config and "Invalid loss function" if not config.no_cuda: loss_fn = loss_fn.cuda() # optionally resume from a checkpoint start_epoch = 1 if config.resume: if os.path.isfile(config.resume): print("=> loading checkpoint '{}'".format(config.resume)) checkpoint = torch.load(config.resume) config.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( config.resume, checkpoint['epoch'])) start_epoch = checkpoint['epoch'] else: print("=> no checkpoint found at '{}'".format(config.resume)) exit(-1) use_tensorboard = not config.no_tb and CrayonClient is not None if use_tensorboard: hostname = '127.0.0.1' port = 8889 host_port = config.tbh.split(':')[:2] if len(host_port) == 1: hostname = host_port[0] elif len(host_port) >= 2: hostname, port = host_port[:2] try: cc = CrayonClient(hostname=hostname, port=port) try: cc.remove_experiment(exp_name) except ValueError: pass exp = cc.create_experiment(exp_name) except Exception as e: exp = None print( "Error (%s) connecting to Tensoboard/Crayon server. Giving up..." % str(e)) else: exp = None # Optional fine-tune of only the final classifier weights for specified number of epochs (or part of) if not config.resume and config.ft_epochs > 0.: if config.opt.lower() == 'adam': finetune_optimizer = optim.Adam(model.get_fc().parameters(), lr=config.ft_lr, weight_decay=config.weight_decay) else: finetune_optimizer = optim.SGD(model.get_fc().parameters(), lr=config.ft_lr, momentum=config.momentum, weight_decay=config.weight_decay) finetune_epochs_int = int(np.ceil(config.ft_epochs)) finetune_final_batches = int( np.ceil((1 - (finetune_epochs_int - config.ft_epochs)) * len(loader_train))) print(finetune_epochs_int, finetune_final_batches) for fepoch in range(1, finetune_epochs_int + 1): if fepoch == finetune_epochs_int and finetune_final_batches: batch_limit = finetune_final_batches else: batch_limit = 0 train_epoch(fepoch, model, loader_train, finetune_optimizer, loss_fn, config, class_weights_norm, output_dir, batch_limit=batch_limit) step = fepoch * len(loader_train) score, _ = validate(step, model, loader_eval, loss_fn, config, 0.3, output_dir) score_metric = 'f2' best_loss = None best_f2 = None threshold = 0.2 try: for epoch in range(start_epoch, num_epochs + 1): if config.decay_epochs: adjust_learning_rate(optimizer, epoch, initial_lr=config.lr, decay_epochs=config.decay_epochs) train_metrics = train_epoch(epoch, model, loader_train, optimizer, loss_fn, config, class_weights_norm, output_dir, exp=exp) step = epoch * len(loader_train) eval_metrics, latest_threshold = validate(step, model, loader_eval, loss_fn, config, threshold, output_dir, exp=exp) if lr_scheduler is not None: lr_scheduler.step(eval_metrics['eval_loss']) rowd = OrderedDict(epoch=epoch) rowd.update(train_metrics) rowd.update(eval_metrics) with open(os.path.join(output_dir, 'summary.csv'), mode='a') as cf: dw = csv.DictWriter(cf, fieldnames=rowd.keys()) if best_loss is None: # first iteration (epoch == 1 can't be used) dw.writeheader() dw.writerow(rowd) best = False if best_loss is None or eval_metrics['eval_loss'] < best_loss[1]: best_loss = (epoch, eval_metrics['eval_loss']) if score_metric == 'loss': best = True if best_f2 is None or eval_metrics['eval_f2'] > best_f2[1]: best_f2 = (epoch, eval_metrics['eval_f2']) if score_metric == 'f2': best = True save_checkpoint( { 'epoch': epoch + 1, 'arch': config.model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'threshold': latest_threshold, 'config': config }, is_best=best, filename=os.path.join(config.checkpoint_path, 'checkpoint-%d.pth.tar' % epoch), output_dir=output_dir) except KeyboardInterrupt: pass print('*** Best loss: {0} (epoch {1})'.format(best_loss[1], best_loss[0])) print('*** Best f2: {0} (epoch {1})'.format(best_f2[1], best_f2[0]))
from torch.utils import data as D from albumentations import ( HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90, Cutout, Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue, IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, IAASharpen, IAAEmboss, RandomContrast, RandomBrightness, Flip, OneOf, Compose, ToGray) from patchwise import Quadrant ## If you want to keep a track of your network on tensorboard, set USE_TENSORBOARD TO 1 in config file. if USE_TENSORBOARD: from pycrayon import CrayonClient cc = CrayonClient(hostname=TENSORBOARD_SERVER) try: cc.remove_experiment(EXP_NAME) except: pass foo = cc.create_experiment(EXP_NAME) ## If you want to use the GPU, set GPU_MODE TO 1 in config file use_gpu = GPU_MODE if use_gpu: torch.cuda.set_device(CUDA_DEVICE) count = 0 def strong_aug(p=.5): return Compose([
def eval_loop(counter, args, shared_model, model_eval): try: SEC_PER_DAY = 24 * 60 * 60 env = build_env(args.type, args, treat_life_lost_as_terminal=False, max_time=5 * 60) model = copy.deepcopy(shared_model) model.eval() # Create a new experiment vis = visdom.Visdom(env='A3C:' + args.name) cc = CrayonClient() names = cc.get_experiment_names() summaries = [] for idx in range(args.n_eval): name = "{} [{}]".format(args.name, idx + 1) if name in names: cc.remove_experiment(name) summaries.append(cc.create_experiment(name)) max_reward = None save_condition = args.save_intervel rewards = [] start_time = time.time() while True: # Sync with the shared model model.load_state_dict(shared_model.state_dict()) restart, eval_start_time, eval_start_step = False, time.time( ), counter.value results = [] for i in range(args.n_eval): model.reset_state() results.append(model_eval(model, env, vis=(vis, i + 1, 60))) if env.exceed_max: restart = True env.reset() break env.reset() if restart: continue eval_end_time, eval_end_step = time.time(), counter.value results = EvalResult(*zip(*results)) rewards.append((counter.value, results.reward)) local_max_reward = np.max(results.reward) if max_reward is None or max_reward < local_max_reward: max_reward = local_max_reward if local_max_reward >= max_reward: # Save model torch.save(model.state_dict(), os.path.join(args.model_path, 'best_model.pth')) time_since_start = eval_end_time - start_time day = time_since_start // SEC_PER_DAY time_since_start %= SEC_PER_DAY seconds_to_finish = (args.n_steps - eval_end_step) / ( eval_end_step - eval_start_step) * (eval_end_time - eval_start_time) days_to_finish = seconds_to_finish // SEC_PER_DAY seconds_to_finish %= SEC_PER_DAY print("STEP:[{}|{}], Time: {}d {}, Finish in {}d {}".format( counter.value, args.n_steps, '%02d' % day, time.strftime("%Hh %Mm %Ss", time.gmtime(time_since_start)), '%02d' % days_to_finish, time.strftime("%Hh %Mm %Ss", time.gmtime(seconds_to_finish)))) print( '\tMax reward: {}, avg_reward: {}, std_reward: {}, min_reward: {}, max_reward: {}' .format(max_reward, np.mean(results.reward), np.std(results.reward), np.min(results.reward), local_max_reward)) # Plot for summary, reward in zip(summaries, results.reward): summary.add_scalar_value('reward', reward, step=eval_start_step) if counter.value > save_condition or counter.value >= args.n_steps: save_condition += args.save_intervel torch.save( model.state_dict(), os.path.join(args.model_path, 'model_iter_{}.pth'.format(counter.value))) torch.save(model.state_dict(), os.path.join(args.model_path, 'model_latest.pth')) with open(os.path.join(args.save_path, 'rewards'), 'a+') as f: for record in rewards: f.write('{}: {}\n'.format(record[0], record[1])) del rewards[:] if counter.value >= args.n_steps: print('Evaluator Finished !!!') break except KeyboardInterrupt: torch.save(shared_model.state_dict(), os.path.join(args.model_path, 'model_latest.pth')) raise
def main(args): #setup tensorboard if args.tensorboard: cc = CrayonClient(hostname="localhost") print(cc.get_experiment_names()) #if args.name in cc.get_experiment_names(): try: cc.remove_experiment(args.name) except: print("experiment didnt exist") cc_server = cc.create_experiment(args.name) # Create model directory full_model_path = args.model_path + "/" + args.name if not os.path.exists(full_model_path): os.makedirs(full_model_path) with open(full_model_path + "/parameters.json", 'w') as f: f.write((json.dumps(vars(args)))) # Image preprocessing transform = transforms.Compose([ transforms.Scale(args.crop_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) mini_transform = transforms.Compose( [transforms.ToPILImage(), transforms.Scale(20), transforms.ToTensor()]) # Load vocabulary wrapper. if args.vocab_path is not None: with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) else: print("building new vocab") vocab = build_vocab(args.image_dir, 1, None) with open((full_model_path + "/vocab.pkl"), 'wb') as f: pickle.dump(vocab, f) # Build data loader data_loader = get_loader(args.image_dir, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) code_data_set = ProcessingDataset(root=args.image_dir, vocab=vocab, transform=transform) train_ds, val_ds = validation_split(code_data_set) train_loader = torch.utils.data.DataLoader(train_ds, collate_fn=collate_fn) test_loader = torch.utils.data.DataLoader(val_ds, collate_fn=collate_fn) train_size = len(train_loader) test_size = len(test_loader) # Build the models encoder = EncoderCNN(args.embed_size, args.train_cnn) print(encoder) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) print(decoder) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Loss and Optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) #params = list(decoder.parameters()) #+ list(encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) start_time = time.time() add_log_entry(args.name, start_time, vars(args)) # Train the Models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(data_loader): decoder.train() encoder.train() # Set mini-batch dataset image_ts = to_var(images, volatile=True) captions = to_var(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] count = images.size()[0] # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() features = encoder(image_ts) outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) loss.backward() optimizer.step() total = targets.size(0) max_index = outputs.max(dim=1)[1] #correct = (max_index == targets).sum() _, predicted = torch.max(outputs.data, 1) correct = predicted.eq(targets.data).cpu().sum() accuracy = 100. * correct / total if args.tensorboard: cc_server.add_scalar_value("train_loss", loss.data[0]) cc_server.add_scalar_value("perplexity", np.exp(loss.data[0])) cc_server.add_scalar_value("accuracy", accuracy) # Print log info if i % args.log_step == 0: print( 'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, accuracy: %2.2f Perplexity: %5.4f' % (epoch, args.num_epochs, i, total_step, loss.data[0], accuracy, np.exp(loss.data[0]))) # Save the models if (i + 1) % args.save_step == 0: torch.save( decoder.state_dict(), os.path.join(full_model_path, 'decoder-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(full_model_path, 'encoder-%d-%d.pkl' % (epoch + 1, i + 1))) if 1 == 2 and i % int(train_size / 10) == 0: encoder.eval() #decoder.eval() correct = 0 for ti, (timages, tcaptions, tlengths) in enumerate(test_loader): timage_ts = to_var(timages, volatile=True) tcaptions = to_var(tcaptions) ttargets = pack_padded_sequence(tcaptions, tlengths, batch_first=True)[0] tfeatures = encoder(timage_ts) toutputs = decoder(tfeatures, tcaptions, tlengths) print(ttargets) print(toutputs) print(ttargets.size()) print(toutputs.size()) #correct = (ttargets.eq(toutputs[0].long())).sum() accuracy = 100 * correct / test_size print('accuracy: %.4f' % (accuracy)) if args.tensorboard: cc_server.add_scalar_value("accuracy", accuracy) torch.save( decoder.state_dict(), os.path.join(full_model_path, 'decoder-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(full_model_path, 'encoder-%d-%d.pkl' % (epoch + 1, i + 1))) end_time = time.time() print("finished training, runtime: %d", [(end_time - start_time)])