class VisdomMonitor(Wrapper): def __init__(self, env, cmdl): super(VisdomMonitor, self).__init__(env) self.freq = cmdl.report_freq # in steps self.cmdl = cmdl if self.cmdl.display_plots: self.vis = Visdom() self.plot = self.vis.line( Y=np.array([0]), X=np.array([0]), opts=dict( title=cmdl.label, caption="Episodic reward per 1200 steps.") ) self.step_cnt = 0 self.ep_cnt = -1 self.ep_rw = [] self.last_reported_ep = 0 def _step(self, action): # self._before_step(action) observation, reward, done, info = self.env.step(action) done = self._after_step(observation, reward, done, info) return observation, reward, done, info def _reset(self): self._before_reset() observation = self.env.reset() self._after_reset(observation) return observation def _after_step(self, o, r, done, info): self.ep_rw[self.ep_cnt] += r self.step_cnt += 1 if self.step_cnt % self.freq == 0: self._update_plot() return done def _before_reset(self): self.ep_rw.append(0) def _after_reset(self, observation): self.ep_cnt += 1 # print("[%2d][%4d] RESET" % (self.ep_cnt, self.step_cnt)) def _update_plot(self): # print(self.last_reported_ep, self.ep_cnt + 1) completed_eps = self.ep_rw[self.last_reported_ep:self.ep_cnt + 1] ep_mean_reward = sum(completed_eps) / len(completed_eps) if self.cmdl.display_plots: self.vis.line( X=np.array([self.step_cnt]), Y=np.array([ep_mean_reward]), win=self.plot, update='append' ) self.last_reported_ep = self.ep_cnt + 1
def main(): args = parser.parse_args() cf = ConfigParser.ConfigParser() try: cf.read(args.conf) except: print("conf file not exists") sys.exit(1) try: seed = cf.get('Training', 'seed') seed = long(seed) except: seed = torch.cuda.initial_seed() cf.set('Training', 'seed', seed) cf.write(open(args.conf, 'w')) USE_CUDA = cf.getboolean("Training", "use_cuda") torch.manual_seed(seed) if USE_CUDA: torch.cuda.manual_seed(seed) logger = init_logger(os.path.join(args.log_dir, 'train_ctc_model.log')) #Define Model rnn_input_size = cf.getint('Model', 'rnn_input_size') rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size') rnn_layers = cf.getint('Model', 'rnn_layers') rnn_type = supported_rnn[cf.get('Model', 'rnn_type')] bidirectional = cf.getboolean('Model', 'bidirectional') batch_norm = cf.getboolean('Model', 'batch_norm') rnn_param = {"rnn_input_size":rnn_input_size, "rnn_hidden_size":rnn_hidden_size, "rnn_layers":rnn_layers, "rnn_type":rnn_type, "bidirectional":bidirectional, "batch_norm":batch_norm} num_class = cf.getint('Model', 'num_class') drop_out = cf.getfloat('Model', 'drop_out') add_cnn = cf.getboolean('Model', 'add_cnn') cnn_param = {} layers = cf.getint('CNN', 'layers') channel = eval(cf.get('CNN', 'channel')) kernel_size = eval(cf.get('CNN', 'kernel_size')) stride = eval(cf.get('CNN', 'stride')) padding = eval(cf.get('CNN', 'padding')) pooling = eval(cf.get('CNN', 'pooling')) batch_norm = cf.getboolean('CNN', 'batch_norm') activation_function = supported_activate[cf.get('CNN', 'activation_function')] cnn_param['batch_norm'] = batch_norm cnn_param['activate_function'] = activation_function cnn_param["layer"] = [] for layer in range(layers): layer_param = [channel[layer], kernel_size[layer], stride[layer], padding[layer]] if pooling is not None: layer_param.append(pooling[layer]) else: layer_param.append(None) cnn_param["layer"].append(layer_param) model = CTC_Model(rnn_param=rnn_param, add_cnn=add_cnn, cnn_param=cnn_param, num_class=num_class, drop_out=drop_out) for idx, m in enumerate(model.children()): print(idx, m) logger.info(str(idx) + "->" + str(m)) dataset = cf.get('Data', 'dataset') data_dir = cf.get('Data', 'data_dir') feature_type = cf.get('Data', 'feature_type') out_type = cf.get('Data', 'out_type') n_feats = cf.getint('Data', 'n_feats') mel = cf.getboolean('Data', 'mel') batch_size = cf.getint("Training", 'batch_size') #Data Loader train_dataset = SpeechDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) dev_dataset = SpeechDataset(data_dir, data_set="dev", feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) if add_cnn: train_loader = SpeechCNNDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_loader = SpeechCNNDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) else: train_loader = SpeechDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_loader = SpeechDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) #decoder for dev set decoder = GreedyDecoder(dev_dataset.int2class, space_idx=-1, blank_index=0) #Training init_lr = cf.getfloat('Training', 'init_lr') num_epoches = cf.getint('Training', 'num_epoches') end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc') decay = cf.getfloat("Training", 'lr_decay') weight_decay = cf.getfloat("Training", 'weight_decay') params = { 'num_epoches':num_epoches, 'end_adjust_acc':end_adjust_acc, 'mel': mel, 'seed':seed, 'decay':decay, 'learning_rate':init_lr, 'weight_decay':weight_decay, 'batch_size':batch_size, 'feature_type':feature_type, 'n_feats': n_feats, 'out_type': out_type } print(params) if USE_CUDA: model = model.cuda() loss_fn = CTCLoss() optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, weight_decay=weight_decay) #visualization for training from visdom import Visdom viz = Visdom() if add_cnn: title = dataset+' '+feature_type+str(n_feats)+' CNN_LSTM_CTC' else: title = dataset+' '+feature_type+str(n_feats)+' LSTM_CTC' opts = [dict(title=title+" Loss", ylabel = 'Loss', xlabel = 'Epoch'), dict(title=title+" Loss on Dev", ylabel = 'DEV Loss', xlabel = 'Epoch'), dict(title=title+' CER on DEV', ylabel = 'DEV CER', xlabel = 'Epoch')] viz_window = [None, None, None] count = 0 learning_rate = init_lr loss_best = 1000 loss_best_true = 1000 adjust_rate_flag = False stop_train = False adjust_time = 0 acc_best = 0 start_time = time.time() loss_results = [] dev_loss_results = [] dev_cer_results = [] while not stop_train: if count >= num_epoches: break count += 1 if adjust_rate_flag: learning_rate *= decay adjust_rate_flag = False for param in optimizer.param_groups: param['lr'] *= decay print("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) logger.info("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) loss = train(model, train_loader, loss_fn, optimizer, logger, add_cnn=add_cnn, print_every=20, USE_CUDA=USE_CUDA) loss_results.append(loss) acc, dev_loss = dev(model, dev_loader, loss_fn, decoder, logger, add_cnn=add_cnn, USE_CUDA=USE_CUDA) print("loss on dev set is %.4f" % dev_loss) logger.info("loss on dev set is %.4f" % dev_loss) dev_loss_results.append(dev_loss) dev_cer_results.append(acc) #adjust learning rate by dev_loss if dev_loss < (loss_best - end_adjust_acc): loss_best = dev_loss loss_best_true = dev_loss #acc_best = acc adjust_rate_count = 0 model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) elif (dev_loss < loss_best + end_adjust_acc): adjust_rate_count += 1 if dev_loss < loss_best and dev_loss < loss_best_true: loss_best_true = dev_loss #acc_best = acc model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) else: adjust_rate_count = 10 if acc > acc_best: acc_best = acc best_model_state = copy.deepcopy(model.state_dict()) best_op_state = copy.deepcopy(optimizer.state_dict()) print("adjust_rate_count:"+str(adjust_rate_count)) print('adjust_time:'+str(adjust_time)) logger.info("adjust_rate_count:"+str(adjust_rate_count)) logger.info('adjust_time:'+str(adjust_time)) if adjust_rate_count == 10: adjust_rate_flag = True adjust_time += 1 adjust_rate_count = 0 if loss_best > loss_best_true: loss_best = loss_best_true model.load_state_dict(model_state) optimizer.load_state_dict(op_state) if adjust_time == 8: stop_train = True time_used = (time.time() - start_time) / 60 print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) x_axis = range(count) y_axis = [loss_results[0:count], dev_loss_results[0:count], dev_cer_results[0:count]] for x in range(len(viz_window)): if viz_window[x] is None: viz_window[x] = viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), opts = opts[x],) else: viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), win = viz_window[x], update = 'replace',) print("End training, best dev loss is: %.4f, acc is: %.4f" % (loss_best, acc_best)) logger.info("End training, best dev loss acc is: %.4f, acc is: %.4f" % (loss_best, acc_best)) model.load_state_dict(best_model_state) optimizer.load_state_dict(best_op_state) best_path = os.path.join(args.log_dir, 'best_model'+'_dev'+str(acc_best)+'.pkl') cf.set('Model', 'model_file', best_path) cf.write(open(args.conf, 'w')) params['epoch']=count torch.save(CTC_Model.save_package(model, optimizer=optimizer, epoch=params, loss_results=loss_results, dev_loss_results=dev_loss_results, dev_cer_results=dev_cer_results), best_path)
class VisdomLogger(Logger): """ Logger that uses visdom to create learning curves Parameters: =========== - env: str, name of the visdom environment - log_checkpoints: bool, whether to use checkpoints or epoch averages for training loss - legend: tuple, names of the different losses that will be plotted. """ def __init__(self, env=None, log_checkpoints=True, losses=('loss', ), phases=('train', 'valid'), server='http://localhost', port=8097, max_y=None, **opts): self.viz = None if Visdom is not None: self.viz = Visdom(server=server, port=port, env=env) self.legend = ['%s.%s' % (p, l) for p in phases for l in losses] opts.update({'legend': self.legend}) self.opts = opts self.env = env self.max_y = max_y self.log_checkpoints = log_checkpoints self.losses = set(losses) self.last = {p: {l: None for l in losses} for p in phases} self.pane = self._init_pane() @skip_on_import_error(Visdom) def _init_pane(self): nan = np.array([np.NAN, np.NAN]) X = np.column_stack([nan] * len(self.legend)) Y = np.column_stack([nan] * len(self.legend)) return self.viz.line( X=X, Y=Y, env=self.env, opts=self.opts) def _update_last(self, epoch, loss, phase, loss_label): self.last[phase][loss_label] = {'X': epoch, 'Y': loss} def _plot_line(self, X, Y, phase, loss_label): name = "%s.%s" % (phase, loss_label) X = np.array([self.last[phase][loss_label]['X'], X]) Y = np.array([self.last[phase][loss_label]['Y'], Y]) if self.max_y: Y = np.clip(Y, Y.min(), self.max_y) self.viz.updateTrace( X=X, Y=Y, name=name, append=True, win=self.pane, env=self.env) def _plot_payload(self, epoch, losses, phase): for label, loss in losses.items(): if label not in self.losses: continue if self.last[phase][label] is not None: self._plot_line(epoch, loss, phase=phase, loss_label=label) self._update_last(epoch, loss, phase, label) @skip_on_import_error(Visdom) def epoch_end(self, payload): if self.log_checkpoints: # only use epoch end if checkpoint isn't being used return losses, epoch = payload['loss'], payload['epoch'] + 1 self._plot_payload(epoch, losses, 'train') @skip_on_import_error(Visdom) def validation_end(self, payload): losses, epoch = payload['loss'], payload['epoch'] + 1 self._plot_payload(epoch, losses, 'valid') @skip_on_import_error(Visdom) def checkpoint(self, payload): epoch = payload['epoch'] + payload["batch"] / payload["total_batches"] losses = payload['loss'] self._plot_payload(epoch, losses, 'train') @skip_on_import_error(Visdom) def attention(self, payload): title = "epoch {epoch}/ batch {batch_num}".format(**payload) if 'title' in self.opts: title = self.opts['title'] + ": " + title self.viz.heatmap( X=np.array(payload["att"]), env=self.env, opts={'rownames': payload["hyp"], 'columnnames': payload["target"], 'title': title})
class Callback(object): """A class representing routines called reactively at specific phases during trained. These can be used to log or visualize the training progress using any of the metric scores developed before. The values are stored at the end of each training epoch. The following metric scores are currently available: * :class:`~gensim.models.callbacks.CoherenceMetric` * :class:`~gensim.models.callbacks.PerplexityMetric` * :class:`~gensim.models.callbacks.DiffMetric` * :class:`~gensim.models.callbacks.ConvergenceMetric` """ def __init__(self, metrics): """ Parameters ---------- metrics : list of :class:`~gensim.models.callbacks.Metric` The list of metrics to be reported by the callback. """ self.metrics = metrics def set_model(self, model): """Save the model instance and initialize any required variables which would be updated throughout training. Parameters ---------- model : :class:`~gensim.models.basemodel.BaseTopicModel` The model for which the training will be reported (logged or visualized) by the callback. """ self.model = model self.previous = None # check for any metric which need model state from previous epoch if any(isinstance(metric, (DiffMetric, ConvergenceMetric)) for metric in self.metrics): self.previous = copy.deepcopy(model) # store diff diagonals of previous epochs self.diff_mat = Queue() if any(metric.logger == "visdom" for metric in self.metrics): if not VISDOM_INSTALLED: raise ImportError("Please install Visdom for visualization") self.viz = Visdom() # store initial plot windows of every metric (same window will be updated with increasing epochs) self.windows = [] if any(metric.logger == "shell" for metric in self.metrics): # set logger for current topic model self.log_type = logging.getLogger('gensim.models.ldamodel') def on_epoch_end(self, epoch, topics=None): """Report the current epoch's metric value. Called at the end of each training iteration. Parameters ---------- epoch : int The epoch that just ended. topics : list of list of str, optional List of tokenized topics. This is required for the coherence metric. Returns ------- dict of (str, object) Mapping from metric names to their values. The type of each value depends on the metric type, for example :class:`~gensim.models.callbacks.DiffMetric` computes a matrix while :class:`~gensim.models.callbacks.ConvergenceMetric` computes a float. """ # stores current epoch's metric values current_metrics = {} # plot all metrics in current epoch for i, metric in enumerate(self.metrics): label = str(metric) value = metric.get_value(topics=topics, model=self.model, other_model=self.previous) current_metrics[label] = value if metric.logger == "visdom": if epoch == 0: if value.ndim > 0: diff_mat = np.array([value]) viz_metric = self.viz.heatmap( X=diff_mat.T, env=metric.viz_env, opts=dict(xlabel='Epochs', ylabel=label, title=label) ) # store current epoch's diff diagonal self.diff_mat.put(diff_mat) # saving initial plot window self.windows.append(copy.deepcopy(viz_metric)) else: viz_metric = self.viz.line( Y=np.array([value]), X=np.array([epoch]), env=metric.viz_env, opts=dict(xlabel='Epochs', ylabel=label, title=label) ) # saving initial plot window self.windows.append(copy.deepcopy(viz_metric)) else: if value.ndim > 0: # concatenate with previous epoch's diff diagonals diff_mat = np.concatenate((self.diff_mat.get(), np.array([value]))) self.viz.heatmap( X=diff_mat.T, env=metric.viz_env, win=self.windows[i], opts=dict(xlabel='Epochs', ylabel=label, title=label) ) self.diff_mat.put(diff_mat) else: self.viz.line( Y=np.array([value]), X=np.array([epoch]), env=metric.viz_env, win=self.windows[i], update='append' ) if metric.logger == "shell": statement = "".join(("Epoch ", str(epoch), ": ", label, " estimate: ", str(value))) self.log_type.info(statement) # check for any metric which need model state from previous epoch if any(isinstance(metric, (DiffMetric, ConvergenceMetric)) for metric in self.metrics): self.previous = copy.deepcopy(self.model) return current_metrics
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os import oneflow as flow from visdom import Visdom import numpy as np import time # import os # os.system("python -m visdom.server") viz = Visdom() viz.line([[0., 0.]], [0], win='train', opts=dict(title='train-loss&acc', legend=['loss', 'acc'])) viz_val = Visdom() viz_val.line([[0.]], [0], win='val', opts=dict(title='val-acc', legend=['acc'])) def InitNodes(args): if args.num_nodes > 1: assert args.num_nodes <= len(args.node_ips) flow.env.ctrl_port(args.ctrl_port) nodes = [] for ip in args.node_ips[:args.num_nodes]: addr_dict = {} addr_dict["addr"] = ip
import torchvision.transforms as transforms import AF_1 import AF_2 import AF_3 import Incep import Hydraplus import dataload from torch.autograd import Variable from visdom import Visdom import numpy as np viz = Visdom() win = viz.line(Y=np.array([0.2]), name="1") import argparse parser = argparse.ArgumentParser() parser.add_argument('-m', help="choose model", choices=['MNet', 'AF1', 'AF2', 'AF3', 'HP']) parser.add_argument('-p', help="load weight path", default=None) parser.add_argument('-mpath', help="load MNet weight path", default=None) parser.add_argument('-af1path', help="load AF1 weight path", default=None) parser.add_argument('-af2path', help="load AF2 weight path", default=None) parser.add_argument('-af3path', help="load AF3 weight path", default=None) args = parser.parse_args()
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) model = net.SimpleNet(28 * 28, 300, 100, 10) if torch.cuda.is_available(): model = model.cuda() criterion = nn.CrossEntropyLoss() #损失函数 optimizer = optim.SGD(model.parameters(), lr=learning_rate) #随机梯度下降的优化器 epoch = 0 x, y = 0, 0 viz = Visdom(env='my_wind1') win = viz.line(X=np.array([x]), Y=np.array([y]), opts=dict(showlegend=True)) lambda2 = 0.00001 for data in train_loader: l2_regularization = torch.tensor(0.0) img, label = data img = img.view(img.size(0), -1) #batch*28*28 if torch.cuda.is_available(): img = img.cuda() label = label.cuda() else: img = Variable(img) label = Variable(label) out = model(img) loss = criterion(out, label)
def get_c(inflection, b): """ c = log(b) / inflection """ return math.log(b) / inflection if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--env', default='sigmoid_schedule') parser.add_argument('--inflection', default=5000, type=int) args = parser.parse_args() from visdom import Visdom viz = Visdom(env=args.env) import numpy as np Y = np.linspace(1, args.inflection * 2, 1000) for b in (10, 100, 1000): c = get_c(args.inflection, b) title = 'c=%.g;b=%d;inflection=%d' % (c, b, args.inflection) viz.line(np.array([generic_sigmoid(b=b, c=c)(i) for i in Y]), Y, opts={'title': title}) for c in (0.0001, 0.001, 0.005): b = get_b(args.inflection, c) title = 'c=%.g;b=%d;inflection=%d' % (c, b, args.inflection) viz.line(np.array([generic_sigmoid(b=b, c=c)(i) for i in Y]), Y, opts={'title': title})
import torch import torch.optim as optim import torch.nn as nn from net import hap import random from torchlight.torchlight.io import IO from utils.common import reconstruct_gait, to_multi_hot from utils.visualizations import display_animations from utils import loader from utils import losses from utils.common import * # wsx visdom from visdom import Visdom viz = Visdom(env='taew') viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss')) torch.manual_seed(1234) rec_loss = losses.quat_angle_loss def h5_to_csv(h5file, csv_save_path): f = h5py.File(h5file, 'r') for idx in range(len(f.keys())): a_group_key = list(f.keys())[idx] data = np.array(f[a_group_key]) # Get the data np.savetxt(os.path.join(csv_save_path, a_group_key + '.csv'), data, delimiter=',') # Save as csv def weights_init(m):
def train(load_model_path=None): train_data = Train_Data() train_loader = DataLoader(train_data, opt.batch_size, shuffle=True) net = DPDNN() net = net.cuda() net = nn.DataParallel(net) # initialize weights by Xavizer for layer in net.modules(): if isinstance(layer, nn.Conv2d): nn.init.xavier_uniform_(layer.weight) if load_model_path: net.load_state_dict(torch.load(load_model_path)) # save model torch.save(net.state_dict(), opt.save_model_path) criterion = nn.MSELoss() criterion = criterion.cuda() optimizer = optim.Adam(net.parameters(), lr=opt.lr) num_show = 0 psnr_best = 0 # visdom vis = Visdom() for epoch in range(opt.max_epoch): for i, (data, label) in enumerate(train_loader): data = data.cuda() label = label.cuda() optimizer.zero_grad() # we only need to train Y channel output = net(data) loss = criterion(output, label) loss.backward() optimizer.step() if i % 20 == 0: # save parameters every 20 batches mse_loss, psnr_now, ssim = val(net, epoch, i) print('[%d, %5d] loss:%.10f PSNR:%.3f SSIM:%.3f' % (epoch + 1, (i + 1)*opt.batch_size, mse_loss, psnr_now, ssim)) # visdom num_show += 1 x = torch.Tensor([num_show]) y1 = torch.Tensor([mse_loss]) y2 = torch.Tensor([psnr_now]) vis.line(X=x, Y=y1, win='loss', update='append', opts={'title': 'loss'}) vis.line(X=x, Y=y2, win='PSNR', update='append', opts={'title': 'PSNR'}) if psnr_best < psnr_now: psnr_best = psnr_now torch.save(net.state_dict(), opt.save_model_path) # learning rate decay if (epoch+1) % 3 == 0: optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * opt.lr_decay print('learning rate: ', optimizer.param_groups[0]['lr']) print('Finished Training')
def train(): ram = buffer.MemoryBuffer(MAX_BUFFER) trainer = Trainer(ram) continue_epi = 121000 if continue_epi > 0: trainer.load_models(continue_epi) var = 0.5 start_time = time.time() vis = Visdom(env='td_error') line_loss = vis.line(np.arange(1)) train_ilsvrc_data_path = 'ilsvrc_train_new.json' ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID' # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID' reward_100 = 0 train_dataset = ILSVRCDataset(train_ilsvrc_data_path, ilsvrc_home + '/train') for train_step in range(MAX_EPISODES): frame_name_list, gt, length = train_dataset.next() img = Image.open(frame_name_list[0]).convert('RGB') img_size = img.size ground_th = gt[0] rate = ground_th[2] / ground_th[3] pos = ground_th reward_all = 0 for init_num in range(1): trainer.init_actor(img, ground_th) img = Image.open(frame_name_list[init_num]).convert('RGB') for frame in range(1, length): img = Image.open(frame_name_list[frame]).convert('RGB') pos_ = pos img_crop_l, img_crop_g, _ = crop_image_actor_(np.array(img), pos) imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107)) imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107)) imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3)) imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3)) # img_l = np2tensor(np_img_l) # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.) deta_pos = trainer.actor(imo_l, imo_g).squeeze(0).cpu().detach().numpy() if np.random.random(1) < var or frame <= 5 or frame % 15 == 0: deta_pos_ = cal_distance(np.vstack([pos, pos]), np.vstack([gt[frame], gt[frame]])) if np.max(abs(deta_pos_)) < 0.1: deta_pos = deta_pos_[0] if deta_pos[2] > 0.05 or deta_pos[2] < -0.05: deta_pos[2] = 0 pos_ = move_crop(pos_, deta_pos, img_size, rate) img_crop_l_, img_crop_g_, out_flag = crop_image_actor_( np.array(img), pos_) # if out_flag: # pos = gt[frame] # continue imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107) imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107) # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3) gt_frame = gt[frame] r = _compute_iou(pos_, gt[frame]) if r > 0.7: reward = 1 elif r >= 0.5 and r <= 0.7: gt_pre = gt[frame - 1] r_pre = _compute_iou(pos, gt_pre) reward = max(0, r - r_pre) else: reward = -1 trainer.ram.add(npBN(imo_crop_g), npBN(imo_g_), deta_pos, reward, npBN(imo_crop_l), npBN(imo_l_)) # if r == 0: # break reward_all += reward pos = pos_ if out_flag or r == 0: pos = gt[frame] trainer.optimize() reward_100 += reward_all gc.collect() if train_step % 100 == 0: td_error = trainer.show_critic_loss() print(train_step, reward_100, 'td_error', td_error) y = np.array(td_error.cpu().detach().numpy()) message = 'train_step: %d, reward_100: %d, td_error: %f \n' % ( train_step, reward_100, y) with open("../logs/train_td_error.txt", "a", encoding='utf-8') as f: f.write(message) vis.line(X=np.array([train_step]), Y=np.array([y]), win=line_loss, update='append') reward_100 = 0 if train_step % 200 == 0: trainer.save_models(train_step) if train_step % 10000 == 0: var = var * 0.95
else: start_iter += 1 avg_loss = int(package.get('avg_loss', 0)) loss_results, cer_results, wer_results = package[ 'loss_results'], package['cer_results'], package['wer_results'] if args.visdom and \ package[ 'loss_results'] is not None and start_epoch > 0: # Add previous scores to visdom graph x_axis = epochs[0:start_epoch] y_axis = torch.stack( (loss_results[0:start_epoch], wer_results[0:start_epoch], cer_results[0:start_epoch]), dim=1) viz_window = viz.line( X=x_axis, Y=y_axis, opts=opts, ) if args.tensorboard and \ package[ 'loss_results'] is not None and start_epoch > 0: # Previous scores to tensorboard logs for i in range(start_epoch): values = { 'Avg Train Loss': loss_results[i], 'Avg WER': wer_results[i], 'Avg CER': cer_results[i] } tensorboard_writer.add_scalars(args.id, values, i + 1) else: with open(args.labels_path) as label_file: labels = str(''.join(json.load(label_file)))
class Logger(): def __init__(self, n_epochs, batches_epoch): self.viz = Visdom() self.n_epochs = n_epochs self.batches_epoch = batches_epoch self.epoch = 1 self.batch = 1 self.prev_time = time.time() self.mean_period = 0 self.losses = {} self.loss_windows = {} self.image_windows = {} self.loss_df = pd.DataFrame(columns=[ "epoch", "batch" "loss_D", "loss_G", "adversarial_loss", "cycle_loss" ]) def log(self, losses=None, images=None): self.mean_period += (time.time() - self.prev_time) self.prev_time = time.time() sys.stdout.write( '\rEpoch %03d/%03d [%04d/%04d] -- ' % (self.epoch, self.n_epochs, self.batch, self.batches_epoch)) """ if self.batch % 10 == 0: temp_df = pd.DataFrame({"epoch": self.epoch, "batch":self.batch, "loss_D": losses["loss_D"].data[0], "loss_G":losses["loss_G"].data[0], "adversarial_loss":losses["loss_G_GAN"].data[0], "cycle_loss":losses["loss_G_cycle"].data[0]}) self.loss_df = pd.concat([self.loss_df, temp_df], axis=0) """ for i, loss_name in enumerate(losses.keys()): if loss_name not in self.losses: self.losses[loss_name] = losses[loss_name].data[0] else: self.losses[loss_name] += losses[loss_name].data[0] if (i + 1) == len(losses.keys()): sys.stdout.write( '%s: %.4f -- ' % (loss_name, self.losses[loss_name] / self.batch)) else: sys.stdout.write( '%s: %.4f | ' % (loss_name, self.losses[loss_name] / self.batch)) batches_done = self.batches_epoch * (self.epoch - 1) + self.batch batches_left = self.batches_epoch * ( self.n_epochs - self.epoch) + self.batches_epoch - self.batch sys.stdout.write('ETA: %s' % (datetime.timedelta( seconds=batches_left * self.mean_period / batches_done))) # Draw images for image_name, tensor in images.items(): if image_name not in self.image_windows: self.image_windows[image_name] = self.viz.image( tensor2image(tensor.data), opts={'title': image_name}) else: self.viz.image(tensor2image(tensor.data), win=self.image_windows[image_name], opts={'title': image_name}) # End of epoch if (self.batch % self.batches_epoch) == 0: # Plot losses for loss_name, loss in self.losses.items(): if loss_name not in self.loss_windows: self.loss_windows[loss_name] = self.viz.line( X=np.array([self.epoch]), Y=np.array([loss / self.batch]), opts={ 'xlabel': 'epochs', 'ylabel': loss_name, 'title': loss_name }) else: self.viz.line(X=np.array([self.epoch]), Y=np.array([loss / self.batch]), win=self.loss_windows[loss_name], update='append') # Reset losses for next epoch self.losses[loss_name] = 0.0 self.epoch += 1 self.batch = 1 sys.stdout.write('\n') else: self.batch += 1
class VisdomLogger(Logger): """ Logger that uses visdom to create learning curves Parameters ---------- - env: str, name of the visdom environment - log_checkpoints: bool, whether to use checkpoints or epoch averages for training loss - legend: tuple, names of the different losses that will be plotted. """ def __init__(self, env=None, log_checkpoints=True, losses=('loss', ), phases=('train', 'valid'), server='http://localhost', port=8097, max_y=None, **opts): if Visdom is None: warnings.warn("Couldn't import visdom: `pip install visdom`") else: self.viz = Visdom(server=server, port=port, env=env) self.legend = ['{}.{}'.format(p, l) for p in phases for l in losses] opts.update({'legend': self.legend}) self.opts = opts self.env = env self.max_y = max_y self.log_checkpoints = log_checkpoints self.losses = set(losses) self.last = {p: {l: None for l in losses} for p in phases} self.pane = self._init_pane() @skip_on_import_error(Visdom) def _init_pane(self): nan = np.array([np.NAN, np.NAN]) X = np.column_stack([nan] * len(self.legend)) Y = np.column_stack([nan] * len(self.legend)) return self.viz.line(X=X, Y=Y, env=self.env, opts=self.opts) def _update_last(self, epoch, loss, phase, loss_label): self.last[phase][loss_label] = {'X': epoch, 'Y': loss} def _plot_line(self, X, Y, phase, loss_label): name = "%s.%s" % (phase, loss_label) X = np.array([self.last[phase][loss_label]['X'], X]) Y = np.array([self.last[phase][loss_label]['Y'], Y]) if self.max_y: Y = np.clip(Y, Y.min(), self.max_y) self.viz.updateTrace(X=X, Y=Y, name=name, append=True, win=self.pane, env=self.env) def _plot_payload(self, epoch, losses, phase): for label, loss in losses.items(): if label not in self.losses: continue if self.last[phase][label] is not None: self._plot_line(epoch, loss, phase=phase, loss_label=label) self._update_last(epoch, loss, phase, label) @skip_on_import_error(Visdom) def epoch_end(self, payload): if self.log_checkpoints: # only use epoch end if checkpoint isn't being used return losses, epoch = payload['loss'], payload['epoch'] + 1 self._plot_payload(epoch, losses, 'train') @skip_on_import_error(Visdom) def validation_end(self, payload): losses, epoch = payload['loss'], payload['epoch'] + 1 self._plot_payload(epoch, losses, 'valid') @skip_on_import_error(Visdom) def checkpoint(self, payload): if not self.log_checkpoints: return epoch = payload['epoch'] + payload["batch"] / payload["total_batches"] losses = payload['loss'] self._plot_payload(epoch, losses, 'train') @skip_on_import_error(Visdom) def attention(self, payload): title = "epoch {epoch}/ batch {batch_num}".format(**payload) if 'title' in self.opts: title = self.opts['title'] + ": " + title self.viz.heatmap(X=np.array(payload["att"]), env=self.env, opts={ 'rownames': payload["hyp"], 'columnnames': payload["target"], 'title': title })
if is_training: global_step += 1 loss = loss_fn(out, y) loss.backward() opt.step() #if (epoch == (warmup_steps - 1)) and batch_no == (len(loader) - 1): # pass # skip #else: # sched.step(global_step) sched.step() curr_lr = opt.param_groups[0]['lr'] vis.line(X=[steps[name]], Y=[curr_lr], win='lr', name='lr', update='append') avg_loss = rolling_loss[name](loss.item(), steps[name]) iteration[f'{name}_loss'].append(avg_loss) y_pred = out.softmax(dim=1).argmax(dim=1) y_true = batch['site1']['targets'].to(device) acc = (y_pred == y_true).float().mean().item() metric += acc count += len(batch) vis.line(X=[steps[name]], Y=[avg_loss], name=f'{name}_loss', win=f'{name}_loss', update='append',
def main(): args = parser.parse_args() cf = ConfigParser.ConfigParser() try: cf.read(args.conf) except: print("conf file not exists") logger = init_logger(os.path.join(args.log_dir, 'train_cnn_lstm_ctc.log')) dataset = cf.get('Data', 'dataset') data_dir = cf.get('Data', 'data_dir') feature_type = cf.get('Data', 'feature_type') out_type = cf.get('Data', 'out_type') n_feats = cf.getint('Data', 'n_feats') batch_size = cf.getint("Training", 'batch_size') #Data Loader train_dataset = myDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats) train_loader = myCNNDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_dataset = myDataset(data_dir, data_set="test", feature_type=feature_type, out_type=out_type, n_feats=n_feats) dev_loader = myCNNDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) #decoder for dev set decoder = GreedyDecoder(dev_dataset.int2phone, space_idx=-1, blank_index=0) #Define Model rnn_input_size = cf.getint('Model', 'rnn_input_size') rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size') rnn_layers = cf.getint('Model', 'rnn_layers') rnn_type = RNN[cf.get('Model', 'rnn_type')] bidirectional = cf.getboolean('Model', 'bidirectional') batch_norm = cf.getboolean('Model', 'batch_norm') num_class = cf.getint('Model', 'num_class') drop_out = cf.getfloat('Model', 'num_class') model = CNN_LSTM_CTC(rnn_input_size=rnn_input_size, rnn_hidden_size=rnn_hidden_size, rnn_layers=rnn_layers, rnn_type=rnn_type, bidirectional=bidirectional, batch_norm=batch_norm, num_class=num_class, drop_out=drop_out) #model.apply(xavier_uniform_init) print(model.name) #Training init_lr = cf.getfloat('Training', 'init_lr') num_epoches = cf.getint('Training', 'num_epoches') end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc') decay = cf.getfloat("Training", 'lr_decay') weight_decay = cf.getfloat("Training", 'weight_decay') try: seed = cf.getint('Training', 'seed') except: seed = torch.cuda.initial_seed() params = { 'num_epoches':num_epoches, 'end_adjust_acc':end_adjust_acc, 'seed':seed, 'decay':decay, 'learning_rate':init_lr, 'weight_decay':weight_decay, 'batch_size':batch_size, 'feature_type':feature_type, 'n_feats': n_feats, 'out_type': out_type } if USE_CUDA: torch.cuda.manual_seed(seed) model = model.cuda() print(params) loss_fn = CTCLoss() optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, weight_decay=weight_decay) #visualization for training from visdom import Visdom viz = Visdom(env='863_corpus') title = dataset+' '+feature_type+str(n_feats)+' CNN_LSTM_CTC' opts = [dict(title=title+" Loss", ylabel = 'Loss', xlabel = 'Epoch'), dict(title=title+" CER on Train", ylabel = 'CER', xlabel = 'Epoch'), dict(title=title+' CER on DEV', ylabel = 'DEV CER', xlabel = 'Epoch')] viz_window = [None, None, None] count = 0 learning_rate = init_lr acc_best = -100 acc_best_true = -100 adjust_rate_flag = False stop_train = False adjust_time = 0 start_time = time.time() loss_results = [] training_cer_results = [] dev_cer_results = [] while not stop_train: if count >= num_epoches: break count += 1 if adjust_rate_flag: learning_rate *= decay adjust_rate_flag = False for param in optimizer.param_groups: param['lr'] *= decay print("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) logger.info("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) loss = train(model, train_loader, loss_fn, optimizer, logger, print_every=20) loss_results.append(loss) cer = dev(model, train_loader, decoder, logger) print("cer on training set is %.4f" % cer) logger.info("cer on training set is %.4f" % cer) training_cer_results.append(cer) acc = dev(model, dev_loader, decoder, logger) dev_cer_results.append(acc) #model_path_accept = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'.pkl' #model_path_reject = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'_rejected.pkl' if acc > (acc_best + end_adjust_acc): acc_best = acc adjust_rate_count = 0 model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) elif (acc > acc_best - end_adjust_acc): adjust_rate_count += 1 if acc > acc_best and acc > acc_best_true: acc_best_true = acc model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) else: adjust_rate_count = 0 #torch.save(model.state_dict(), model_path_reject) print("adjust_rate_count:"+str(adjust_rate_count)) print('adjust_time:'+str(adjust_time)) logger.info("adjust_rate_count:"+str(adjust_rate_count)) logger.info('adjust_time:'+str(adjust_time)) if adjust_rate_count == 10: adjust_rate_flag = True adjust_time += 1 adjust_rate_count = 0 acc_best = acc_best_true model.load_state_dict(model_state) optimizer.load_state_dict(op_state) if adjust_time == 8: stop_train = True time_used = (time.time() - start_time) / 60 print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) x_axis = range(count) y_axis = [loss_results[0:count], training_cer_results[0:count], dev_cer_results[0:count]] for x in range(len(viz_window)): if viz_window[x] is None: viz_window[x] = viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), opts = opts[x],) else: viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), win = viz_window[x], update = 'replace',) print("End training, best cv acc is: %.4f" % acc_best) logger.info("End training, best cv acc is: %.4f" % acc_best) best_path = os.path.join(args.log_dir, 'best_model'+'_cv'+str(acc_best)+'.pkl') cf.set('Model', 'model_file', best_path) cf.write(open(args.conf, 'w')) params['epoch']=count torch.save(CNN_LSTM_CTC.save_package(model, optimizer=optimizer, epoch=params, loss_results=loss_results, training_cer_results=training_cer_results, dev_cer_results=dev_cer_results), best_path)
class EvaluationMonitor(Wrapper): def __init__(self, env, cmdl): super(EvaluationMonitor, self).__init__(env) self.freq = cmdl.eval_freq # in steps self.eval_steps = cmdl.evaluator.eval_steps self.cmdl = cmdl if self.cmdl.display_plots: self.vis = Visdom() self.plot = self.vis.line( Y=np.array([0]), X=np.array([0]), opts=dict( title=cmdl.label, caption="Episodic reward per %d steps." % self.eval_steps) ) self.crt_step = 0 self.step_cnt = 0 self.ep_cnt = 0 self.total_rw = 0 self.max_mean_rw = -100 def get_crt_step(self, crt_step): self.crt_step = crt_step def _reset_monitor(self): self.step_cnt, self.ep_cnt, self.total_rw = 0, 0, 0 def _step(self, action): # self._before_step(action) observation, reward, done, info = self.env.step(action) done = self._after_step(observation, reward, done, info) return observation, reward, done, info def _reset(self): observation = self.env.reset() self._after_reset(observation) return observation def _after_step(self, o, r, done, info): self.total_rw += r self.step_cnt += 1 if self.step_cnt == self.eval_steps: self._update_plot() self._reset_monitor() return done def _after_reset(self, observation): self.ep_cnt += 1 # print("[%2d][%4d] RESET" % (self.ep_cnt, self.step_cnt)) def _update_plot(self): mean_rw = self.total_rw / self.ep_cnt max_mean_rw = self.max_mean_rw bg_color = 'on_blue' bg_color = 'on_magenta' if mean_rw > max_mean_rw else bg_color self.max_mean_rw = mean_rw if mean_rw > max_mean_rw else max_mean_rw if self.cmdl.display_plots: self.vis.line( X=np.array([self.crt_step]), Y=np.array([mean_rw]), win=self.plot, update='append' ) print(clr("[Evaluator] done in %5d steps. " % self.step_cnt, 'grey', 'on_white') + clr(" rw/ep=%3.2f " % mean_rw, 'white', bg_color))
xtickstep=1, ytickmin=-0, ytickmax=500, ytickstep=1, markersymbol='dot', markercolor=np.random.randint(0, 255, ( num_data, 3, )), ), ) # loss x = np.reshape([i for i in range(num_epoch)], newshape=[num_epoch, 1]) loss_data = np.reshape(loss_arr, newshape=[num_epoch, 1]) win3 = viz.line( X=x, Y=loss_data, opts=dict( xtickmin=0, xtickmax=num_epoch, xtickstep=1, ytickmin=0, ytickmax=20, ytickstep=1, markercolor=np.random.randint(0, 255, num_epoch), ), )
import torchvision.datasets as datasets import torchvision.models as models import argparse parser = argparse.ArgumentParser(description='selfNetArc') parser.add_argument('--out_person_num', type=int, required=False, default=0) args = parser.parse_args() from MPIIGazeData import MPIIGazeDataset from modifiedITrackerModel import modifiedITrackerModel from visdom import Visdom vis = Visdom() trainline = vis.line(Y=np.array([0])) testline = vis.line(Y=np.array([0])) ''' Train/test code for iTracker. Author: Petr Kellnhofer ( pkel_lnho (at) gmai_l.com // remove underscores and spaces), 2018. Website: http://gazecapture.csail.mit.edu/ Cite: Eye Tracking for Everyone K.Krafka*, A. Khosla*, P. Kellnhofer, H. Kannan, S. Bhandarkar, W. Matusik and A. Torralba IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016 @inproceedings{cvpr2016_gazecapture,
from visdom import Visdom ####单个线条 #实例化 # viz=Visdom() # #创建一个窗口trian loss # # Y, x , ID(defult窗口=main),其他参数,窗口名称 # viz.line([0.],[0.],win='train_loss',opts=dict(title='train loss')) # # 直接数据 x坐标代表时间戳 添加方式 # viz.line([loss.item()], [global_step], win ='train_loss', update='append') ########lines:multi-traces######### viz=Visdom() #创建一个窗口trian loss # Y, x , ID(defult窗口=main),其他参数,legend,y1和y2的 viz.line([[0.0, 0.0]],[0.],win='test',opts=dict(title='test loss&acc', legend=['loss','acc.'])) # 直接数据 x坐标代表时间戳 添加方式 viz.line([[test_loss, correct / len(test_loader.dataset)]], [global_step], win ='test', update='append') ########visual X#################### viz=Visdom() viz.images(data.view(-1,1,28,28),win='x') viz.test(str(pred.detach().cpu().numpy()),win='pred', opts=dict(title='pred')
class Visual_loss(object): def __init__(self,win_name='default'): self.win_name=win_name self.viz=Visdom(env=self.win_name) self.initialize_=False self.initial_pn=False self.initial_pn_val=False self.initial_acc_flag=False self.initial_acc_flag_val=False def initialize(self,total_loss,classify_loss=0,regression_loss=0,X=0): self.loss_win=self.viz.line(Y=np.array([total_loss]),X=np.array([X]),opts=dict(title='loss_value')) #self.loss_classify_win=self.viz.line(Y=np.array([classify_loss]),X=np.array([X]),opts=dict(title='classify_loss')) #self.loss_regress_win=self.viz.line(Y=np.array([regression_loss]),X=np.array([X]),opts=dict(title='regress_loss')) def show_loss_curve(self,total_loss,classify_loss=0,regression_loss=0,X=0): if not self.initialize_: self.initialize_=True self.initialize(total_loss,classify_loss,regression_loss,X) else: self.viz.line(Y=np.array([total_loss]),X=np.array([X]),win=self.loss_win,update='append') #self.viz.line(Y=np.array([classify_loss]),X=np.array([X]),win=self.loss_classify_win,update='append') #self.viz.line(Y=np.array([regression_loss]),X=np.array([X]),win=self.loss_regress_win,update='append') def initial_pos_neg(self,tpr,tnr,epoch): self.tpr_win=self.viz.line(X=np.array([epoch]),Y=np.array([tpr]),opts=dict(title='tpr')) self.tnr_win=self.viz.line(X=np.array([epoch]),Y=np.array([tnr]),opts=dict(title='tnr')) def show_pos_neg_curve(self,tpr,tnr,epoch): if not self.initial_pn: self.initial_pn=True self.initial_pos_neg(tpr,tnr,epoch) else: self.viz.line(X=np.array([epoch]),Y=np.array([tpr]),win=self.tpr_win,update='append') self.viz.line(X=np.array([epoch]),Y=np.array([tnr]),win=self.tnr_win,update='append') def initial_pos_neg_val(self,tpr,tnr,epoch): self.tpr_win_val=self.viz.line(X=np.array([epoch]),Y=np.array([tpr]),opts=dict(title='tpr_val')) self.tnr_win_val=self.viz.line(X=np.array([epoch]),Y=np.array([tnr]),opts=dict(title='tnr_val')) def show_pos_neg_curve_val(self,tpr,tnr,epoch): if not self.initial_pn_val: self.initial_pn_val=True self.initial_pos_neg_val(tpr,tnr,epoch) else: self.viz.line(X=np.array([epoch]),Y=np.array([tpr]),win=self.tpr_win_val,update='append') self.viz.line(X=np.array([epoch]),Y=np.array([tnr]),win=self.tnr_win_val,update='append') def initial_acc(self,acc,epoch,title_name): self.acc_win=self.viz.line(X=np.array([epoch]),Y=np.array([acc]),opts=dict(title=title_name)) def show_acc(self,acc,epoch,title_name,val=False): if not self.initial_acc_flag: self.initial_acc_flag=True self.initial_acc(acc,epoch,title_name) else: self.viz.line(X=np.array([epoch]),Y=np.array([acc]),win=self.acc_win,update="append") def initial_acc_val(self,acc,epoch,title_name): self.acc_win_val=self.viz.line(X=np.array([epoch]),Y=np.array([acc]),opts=dict(title=title_name)) def show_acc_val(self,acc,epoch,title_name): if not self.initial_acc_flag_val: self.initial_acc_flag_val=True self.initial_acc_val(acc,epoch,title_name) else: self.viz.line(X=np.array([epoch]),Y=np.array([acc]),win=self.acc_win_val,update="append") def show_dice(self,metric,step,title_name): self.show_acc(metric,step,title_name) def show_dice_val(self,metric,step,title_name): self.show_acc_val(metric,step,title_name)
class Solver(object): """ """ def __init__(self, data, model, optimizer, args): self.tr_loader = data['tr_loader'] self.cv_loader = data['cv_loader'] self.model = model self.optimizer = optimizer self.ctc_loss = CTCLoss(size_average=True) # Low frame rate feature self.LFR_m = args.LFR_m self.LFR_n = args.LFR_n # Training config self.epochs = args.epochs self.half_lr = args.half_lr self.early_stop = args.early_stop self.max_norm = args.max_norm self.ctc_trun = args.ctc_trun self.align_trun = args.align_trun self.half_lr_epoch = args.half_lr_epoch # save and load model self.save_folder = args.save_folder self.checkpoint = args.checkpoint self.continue_from = args.continue_from self.model_path = args.model_path # logging self.print_freq = args.print_freq # visualizing loss using visdom self.tr_loss = torch.Tensor(self.epochs) self.cv_loss = torch.Tensor(self.epochs) self.visdom = args.visdom self.visdom_id = args.visdom_id if self.visdom: from visdom import Visdom self.vis = Visdom(env=self.visdom_id) self.vis_opts = dict(title=self.visdom_id, ylabel='Loss', xlabel='Epoch', legend=['train loss', 'cv loss']) self.vis_window = None self.vis_epochs = torch.arange(1, self.epochs + 1) self._reset() def _reset(self): # Reset if self.continue_from: print('Loading checkpoint model %s' % self.continue_from) package = torch.load(self.continue_from) self.model.load_state_dict(package['state_dict']) self.optimizer.load_state_dict(package['optim_dict']) self.start_epoch = int(package.get('epoch', 1)) self.tr_loss[:self.start_epoch] = package['tr_loss'][:self.start_epoch] self.cv_loss[:self.start_epoch] = package['cv_loss'][:self.start_epoch] else: self.start_epoch = 0 # Create save folder os.makedirs(self.save_folder, exist_ok=True) self.prev_val_loss = float("inf") self.best_val_loss = float("inf") self.halving = False def train(self): #import pdb #pdb.set_trace() # Train model multi-epoches for epoch in range(self.start_epoch, self.epochs): # Train one epoch print("Training...") self.model.train() # Turn on BatchNorm & Dropout start = time.time() tr_avg_loss = self._run_one_epoch(epoch) print('-' * 85) print('Train Summary | End of Epoch {0} | Time {1:.2f}s | ' 'Train Loss {2:.3f}'.format( epoch + 1, time.time() - start, tr_avg_loss)) print('-' * 85) # Save model each epoch if self.checkpoint: file_path = os.path.join( self.save_folder, 'epoch%d.pth.tar' % (epoch + 1)) torch.save(self.model.serialize(self.model, self.optimizer, epoch + 1, self.LFR_m, self.LFR_n, tr_loss=self.tr_loss, cv_loss=self.cv_loss), file_path) print('Saving checkpoint model to %s' % file_path) # Cross validation print('Cross validation...') self.model.eval() # Turn off Batchnorm & Dropout val_loss = self._run_one_epoch(epoch, cross_valid=True) print('-' * 85) print('Valid Summary | End of Epoch {0} | Time {1:.2f}s | ' 'Valid Loss {2:.3f}'.format( epoch + 1, time.time() - start, val_loss)) print('-' * 85) # Adjust learning rate (halving) if self.half_lr and epoch >= self.half_lr_epoch: if self.early_stop and self.halving: print("Already start halving learing rate, it still gets " "too small imporvement, stop training early.") break self.halving = True if self.halving: optim_state = self.optimizer.state_dict() optim_state['param_groups'][0]['lr'] = \ optim_state['param_groups'][0]['lr'] / 2.0 self.optimizer.load_state_dict(optim_state) print('Learning rate adjusted to: {lr:.6f}'.format( lr=optim_state['param_groups'][0]['lr'])) self.prev_val_loss = val_loss # Save the best model self.tr_loss[epoch] = tr_avg_loss self.cv_loss[epoch] = val_loss if val_loss < self.best_val_loss: self.best_val_loss = val_loss file_path = os.path.join(self.save_folder, self.model_path) torch.save(self.model.serialize(self.model, self.optimizer, epoch + 1, self.LFR_m, self.LFR_n, tr_loss=self.tr_loss, cv_loss=self.cv_loss), file_path) print("Find better validated model, saving to %s" % file_path) # visualizing loss using visdom if self.visdom: x_axis = self.vis_epochs[0:epoch + 1] y_axis = torch.stack( (self.tr_loss[0:epoch + 1], self.cv_loss[0:epoch + 1]), dim=1) if self.vis_window is None: self.vis_window = self.vis.line( X=x_axis, Y=y_axis, opts=self.vis_opts, ) else: self.vis.line( X=x_axis.unsqueeze(0).expand(y_axis.size( 1), x_axis.size(0)).transpose(0, 1), # Visdom fix Y=y_axis, win=self.vis_window, update='replace', ) def _run_one_epoch(self, epoch, cross_valid=False): start = time.time() total_loss = 0 total_wer = [] #import pdb #pdb.set_trace() data_loader = self.tr_loader if not cross_valid else self.cv_loader # visualizing loss using visdom if self.visdom and not cross_valid: vis_opts_epoch = dict(title=self.visdom_id + " epoch " + str(epoch), ylabel='Loss', xlabel='Epoch') vis_window_epoch = None vis_iters = torch.arange(1, len(data_loader) + 1) vis_iters_loss = torch.Tensor(len(data_loader)) for i, (data) in enumerate(data_loader): padded_input, input_lengths, padded_target, output_lengths, aligns = data padded_input = padded_input.cuda() input_lengths = input_lengths.cuda() padded_target = padded_target.cuda() output_lengths = output_lengths.cuda() aligns = aligns.cuda() #time1 = time.time() if self.ctc_trun: loss, batch_wer = self.model(padded_input, input_lengths, padded_target, output_lengths) #loss = self.ctc_loss(probs, padded_target, input_lengths, output_lengths) elif self.align_trun: loss = self.model(padded_input, input_lengths, padded_target, i, epoch, aligns) batch_wer = [0] else: loss = self.model(padded_input, input_lengths, padded_target, i, epoch) batch_wer = [0] #time2 = time.time() if not cross_valid: self.optimizer.zero_grad() loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_norm) self.optimizer.step() total_loss += loss.item() total_wer.extend(batch_wer) #time3 = time.time() #print(1000 * (time2 - time1), 1000 * (time3 - time2)) if i % self.print_freq == 0: print('Epoch {0} | Iter {1} | Average Loss {2:.3f} | ' 'Current Loss {3:.6f} | Total Wer {4:.6f} | Current Wer {5:.6f}| {6:.1f} ms/batch'.format( epoch + 1, i + 1, total_loss / (i + 1), loss.item(), sum(total_wer) / len(total_wer), batch_wer[0], 1000 * (time.time() - start) / (i + 1)), flush=True) # visualizing loss using visdom if self.visdom and not cross_valid: vis_iters_loss[i] = loss.item() if i % self.print_freq == 0: x_axis = vis_iters[:i+1] y_axis = vis_iters_loss[:i+1] if vis_window_epoch is None: vis_window_epoch = self.vis.line(X=x_axis, Y=y_axis, opts=vis_opts_epoch) else: self.vis.line(X=x_axis, Y=y_axis, win=vis_window_epoch, update='replace') return total_loss / (i + 1)
class VisdomWriter: def __init__(self): try: from visdom import Visdom except ImportError: raise ImportError( "Visdom visualization requires installation of Visdom") self.scalar_dict = {} self.server_connected = False self.vis = Visdom() self.windows = {} self._try_connect() def _try_connect(self): startup_sec = 1 self.server_connected = self.vis.check_connection() while not self.server_connected and startup_sec > 0: time.sleep(0.1) startup_sec -= 0.1 self.server_connected = self.vis.check_connection() assert self.server_connected, 'No connection could be formed quickly' @_check_connection def add_scalar(self, tag, scalar_value, global_step=None, main_tag='default'): """Add scalar data to Visdom. Plots the values in a plot titled {main_tag}-{tag}. Args: tag (string): Data identifier scalar_value (float or string/blobname): Value to save global_step (int): Global step value to record main_tag (string): Data group identifier """ if self.scalar_dict.get(main_tag) is None: self.scalar_dict[main_tag] = {} exists = self.scalar_dict[main_tag].get(tag) is not None self.scalar_dict[main_tag][tag] = self.scalar_dict[main_tag][tag] + \ [scalar_value] if exists else [scalar_value] plot_name = '{}-{}'.format(main_tag, tag) # If there is no global_step provided, follow sequential order x_val = len(self.scalar_dict[main_tag] [tag]) if not global_step else global_step if exists: # Update our existing Visdom window self.vis.line( X=make_np(x_val), Y=make_np(scalar_value), name=plot_name, update='append', win=self.windows[plot_name], ) else: # Save the window if we are creating this graph for the first time self.windows[plot_name] = self.vis.line( X=make_np(x_val), Y=make_np(scalar_value), name=plot_name, opts={ 'title': plot_name, 'xlabel': 'timestep', 'ylabel': tag, }, ) @_check_connection def add_scalars(self, main_tag, tag_scalar_dict, global_step=None): """Adds many scalar data to summary. Note that this function also keeps logged scalars in memory. In extreme case it explodes your RAM. Args: tag (string): Data identifier main_tag (string): Data group identifier tag_scalar_dict (dict): Key-value pair storing the tag and corresponding values global_step (int): Global step value to record Examples:: writer.add_scalars('run_14h',{'xsinx':i*np.sin(i/r), 'xcosx':i*np.cos(i/r), 'arctanx': numsteps*np.arctan(i/r)}, i) This function adds three plots: 'run_14h-xsinx', 'run_14h-xcosx', 'run_14h-arctanx' with the corresponding values. """ for key in tag_scalar_dict.keys(): self.add_scalar(key, tag_scalar_dict[key], global_step, main_tag) @_check_connection def export_scalars_to_json(self, path): """Exports to the given 'path' an ASCII file containing all the scalars written so far by this instance, with the following format: {writer_id : [[timestamp, step, value], ...], ...} The scalars saved by ``add_scalars()`` will be flushed after export. """ with open(path, "w") as f: json.dump(self.scalar_dict, f) self.scalar_dict = {} @_check_connection def add_histogram(self, tag, values, global_step=None, bins='tensorflow'): """Add histogram to summary. Args: tag (string): Data identifier values (torch.Tensor, numpy.array, or string/blobname): Values to build histogram global_step (int): Global step value to record bins (string): one of {'tensorflow', 'auto', 'fd', ...}, this determines how the bins are made. You can find other options in: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html """ values = make_np(values) self.vis.histogram(make_np(values), opts={'title': tag}) @_check_connection def add_image(self, tag, img_tensor, global_step=None, caption=None): """Add image data to summary. Note that this requires the ``pillow`` package. Args: tag (string): Data identifier img_tensor (torch.Tensor, numpy.array, or string/blobname): Image data global_step (int): Global step value to record Shape: img_tensor: :math:`(C, H, W)`. Use ``torchvision.utils.make_grid()`` to prepare it is a good idea. C = colors (can be 1 - grayscale, 3 - RGB, 4 - RGBA) """ img_tensor = make_np(img_tensor) self.vis.image(img_tensor, opts={'title': tag, 'caption': caption}) @_check_connection def add_figure(self, tag, figure, global_step=None, close=True): """Render matplotlib figure into an image and add it to summary. Note that this requires the ``matplotlib`` package. Args: tag (string): Data identifier figure (matplotlib.pyplot.figure) or list of figures: figure or a list of figures global_step (int): Global step value to record close (bool): Flag to automatically close the figure """ self.add_image(tag, figure_to_image(figure, close), global_step) @_check_connection def add_video(self, tag, vid_tensor, global_step=None, fps=4): """Add video data to summary. Note that this requires the ``moviepy`` package. Args: tag (string): Data identifier vid_tensor (torch.Tensor): Video data global_step (int): Global step value to record fps (float or int): Frames per second Shape: vid_tensor: :math:`(B, C, T, H, W)`. (if following tensorboardX format) vid_tensor: :math:`(T, H, W, C)`. (if following visdom format) B = batches, C = colors (1, 3, or 4), T = time frames, H = height, W = width """ shape = vid_tensor.shape # A batch of videos (tensorboardX format) is a 5D tensor if len(shape) > 4: for i in range(shape[0]): # Reshape each video to Visdom's (T x H x W x C) and write each video # TODO: reverse the logic here, shoudl do the permutation in # numpy if isinstance(vid_tensor, np.ndarray): import torch ind_vid = torch.from_numpy( vid_tensor[i, :, :, :, :]).permute(1, 2, 3, 0) else: ind_vid = vid_tensor[i, :, :, :, :].permute(1, 2, 3, 0) scale_factor = 255 if np.any((ind_vid > 0) & (ind_vid < 1)) else 1 # Visdom looks for .ndim attr, this is something raw Tensors don't have # Cast to Numpy array to get .ndim attr ind_vid = ind_vid.numpy() ind_vid = (ind_vid * scale_factor).astype(np.uint8) assert ind_vid.shape[3] in [1, 3, 4], \ 'Visdom requires the last dimension to be color, which can be 1 (grayscale), 3 (RGB) or 4 (RGBA)' self.vis.video(tensor=ind_vid, opts={'fps': fps}) else: self.vis.video(tensor=vid_tensor, opts={'fps': fps}) @_check_connection def add_audio(self, tag, snd_tensor, global_step=None, sample_rate=44100): """Add audio data to summary. Args: tag (string): Data identifier snd_tensor (torch.Tensor, numpy.array, or string/blobname): Sound data global_step (int): Global step value to record sample_rate (int): sample rate in Hz Shape: snd_tensor: :math:`(1, L)`. The values should lie between [-1, 1]. """ snd_tensor = make_np(snd_tensor) self.vis.audio(tensor=snd_tensor, opts={'sample_frequency': sample_rate}) @_check_connection def add_text(self, tag, text_string, global_step=None): """Add text data to summary. Args: tag (string): Data identifier text_string (string): String to save global_step (int): Global step value to record Examples:: writer.add_text('lstm', 'This is an lstm', 0) writer.add_text('rnn', 'This is an rnn', 10) """ if text_string is None: # Visdom doesn't support tags, write the tag as the text_string text_string = tag self.vis.text(text_string) @_check_connection def add_onnx_graph(self, prototxt): # TODO: Visdom doesn't support graph visualization yet, so this is a # no-op return @_check_connection def add_graph(self, model, input_to_model=None, verbose=False, **kwargs): # TODO: Visdom doesn't support graph visualization yet, so this is a # no-op return @_check_connection def add_embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None): # TODO: Visdom doesn't support embeddings yet, so this is a no-op return @_check_connection def add_pr_curve(self, tag, labels, predictions, global_step=None, num_thresholds=127, weights=None): """Adds precision recall curve. Args: tag (string): Data identifier labels (torch.Tensor, numpy.array, or string/blobname): Ground truth data. Binary label for each element. predictions (torch.Tensor, numpy.array, or string/blobname): The probability that an element be classified as true. Value should in [0, 1] global_step (int): Global step value to record num_thresholds (int): Number of thresholds used to draw the curve. """ labels, predictions = make_np(labels), make_np(predictions) raw_data = compute_curve(labels, predictions, num_thresholds, weights) # compute_curve returns np.stack((tp, fp, tn, fn, precision, recall)) # We want to access 'precision' and 'recall' precision, recall = raw_data[4, :], raw_data[5, :] self.vis.line( X=recall, Y=precision, name=tag, opts={ 'title': 'PR Curve for {}'.format(tag), 'xlabel': 'recall', 'ylabel': 'precision', }, ) @_check_connection def add_pr_curve_raw(self, tag, true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall, global_step=None, num_thresholds=127, weights=None): """Adds precision recall curve with raw data. Args: tag (string): Data identifier true_positive_counts (torch.Tensor, numpy.array, or string/blobname): true positive counts false_positive_counts (torch.Tensor, numpy.array, or string/blobname): false positive counts true_negative_counts (torch.Tensor, numpy.array, or string/blobname): true negative counts false_negative_counts (torch.Tensor, numpy.array, or string/blobname): false negative counts precision (torch.Tensor, numpy.array, or string/blobname): precision recall (torch.Tensor, numpy.array, or string/blobname): recall global_step (int): Global step value to record num_thresholds (int): Number of thresholds used to draw the curve. see: https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/pr_curve/README.md """ precision, recall = make_np(precision), make_np(recall) self.vis.line( X=recall, Y=precision, name=tag, opts={ 'title': 'PR Curve for {}'.format(tag), 'xlabel': 'recall', 'ylabel': 'precision', }, ) def close(self): del self.vis del self.scalar_dict gc.collect()
def train(config): # choose dataset for different training purpose if config.segmentation_only: image_datasets = {phase: SegDataLoader(data_dir=config.data_dir, phase=phase, data_size=config.data_size) for phase in ['train', 'eval']} else: image_datasets = {phase: ClsDataLoader(data_dir=config.data_dir, phase=phase, data_size=config.data_size) for phase in ['train', 'eval']} print('loading dataset: train: {}, eval: {}' .format(len(image_datasets['train']), len(image_datasets['eval']))) dataset_loaders = {'train': data.DataLoader(image_datasets['train'], batch_size=config.batch_size, shuffle=True, num_workers=4), 'eval': data.DataLoader(image_datasets['eval'], batch_size=config.batch_size, shuffle=False, num_workers=0) } model, classifier = model_choice(config.model_name, config.out_channels) if config.model_prefix > 0: model_file = os.path.join(config.out_dir, 'models', str(config.model_prefix)+'.pt') assert os.path.exists(model_file), \ 'pretrained model file ({}) does not exist, please check'.format(model_file) checkpoint = torch.load(model_file, map_location='cpu') try: model.load_state_dict(checkpoint['seg'], strict=False) opt1 = torch.optim.Adam(model.parameters(), lr=checkpoint['lr1']) classifier.load_state_dict(checkpoint['cls'], strict=False) opt2 = torch.optim.Adam(classifier.parameters(), lr=config.lr) print('loading checkpoint from {}'.format(str(config.model_prefix) + '.pt')) print('loss: {}'.format(checkpoint['loss'])) except KeyError: opt2 = torch.optim.Adam(classifier.parameters(), lr=config.lr) else: opt1 = torch.optim.Adam(model.parameters(), lr=config.lr) opt2 = torch.optim.Adam(classifier.parameters(), lr=config.lr) lr_scheduler_1 = torch.optim.lr_scheduler.ExponentialLR(opt1, gamma=0.99) lr_scheduler_2 = torch.optim.lr_scheduler.ExponentialLR(opt2, gamma=0.9) CELoss = nn.CrossEntropyLoss() print('running on {}'.format(config.device)) # set visdom if config.use_visdom: viz = Visdom() assert viz.check_connection() visline1 = viz.line( X=torch.Tensor([1]).cpu() * config.model_prefix, Y=torch.Tensor([0]).cpu(), win=1, opts=dict(xlabel='epochs', ylabel='loss', title='training loss', ) ) visline2 = viz.line( X=torch.Tensor([1]).cpu() * config.model_prefix, Y=torch.Tensor([0]).cpu(), win=2, opts=dict(xlabel='epochs', ylabel='loss', title='evaluation loss', ) ) visline3 = viz.line( X=torch.Tensor([1]).cpu() * config.model_prefix, Y=torch.Tensor([0]).cpu(), win=3, opts=dict(xlabel='epochs', ylabel='LR', title='Learning rate') ) model = model.to(device=config.device) classifier = classifier.to(device=config.device) global_steps = {'train': 0, 'eval': 0} global min_loss min_loss = 1e5 for epoch in range(config.model_prefix, config.epochs): lr_scheduler_1.step() lr_scheduler_2.step() for phase in ['train', 'eval']: running_loss = [] if phase == 'train': model.train() classifier.train() else: model.eval() classifier.eval() for i, (images, labels) in enumerate(dataset_loaders[phase]): start = time.time() images = images.to(device=config.device) labels = labels.to(device=config.device) opt1.zero_grad() opt2.zero_grad() with torch.set_grad_enabled(phase == 'train'): outputs = model(images) outputs_mask = torch.argmax(outputs, dim=1, keepdim=False) if config.segmentation_only: loss = CELoss(outputs, labels) else: classes = classifier(outputs) loss = CELoss(classes, labels) running_loss.append(loss.item()) if phase == 'train': loss.backward() opt1.step() opt2.step() end = time.time() print('*'*20) print('epoch: {}/{} {}_global_steps: {} processing_time: {:.4f} s LR: {:.8f}'. format(epoch, config.epochs, phase, global_steps[phase], end-start, opt1.param_groups[0]['lr'])) print('{} loss: {:.6}'.format(phase, loss.item())) if phase == 'train' and i % 10 == 0: logging.info('epoch:{} steps:{} processing_time:{:.4f}s LR:{:.8f} loss:{:.6}'. format(epoch, global_steps[phase], end-start, opt1.param_groups[0]['lr'], loss.item())) if phase == 'eval': logging.info('eval_epoch:{} steps:{} processing_time:{:.4f}s LR:{:.8f} loss:{:.6}'. format(epoch, global_steps[phase], end - start, opt1.param_groups[0]['lr'], loss.item())) # set visdom if config.use_visdom and i % 5 == 0: if phase == 'train': viz.line( X=torch.Tensor([1]).cpu() * (epoch + i * config.batch_size / len(image_datasets[phase])), Y=torch.Tensor([loss.item()]).cpu(), win=visline1, update='append' ) viz.line( X=torch.Tensor([1]).cpu() * (epoch + i * config.batch_size / len(image_datasets[phase])), Y=torch.Tensor([opt1.param_groups[0]['lr']]), win=visline3, update='append' ) else: viz.line( X=torch.Tensor([1]).cpu() * (epoch + i * config.batch_size / len(image_datasets[phase])), Y=torch.Tensor([loss.item()]), win=visline2, update='append' ) global_steps[phase] += 1 if epoch % config.image_intervals == 0: if config.segmentation_only: image_saver(images=images, masks=outputs_mask, out_dir=os.path.join(config.out_dir, 'images'), phase=phase, steps=global_steps[phase], epoch=epoch) else: cam_weights = list(classifier.parameters())[-1].data.cpu().numpy() image_saver_cam(images=images, heatmaps=outputs, probs=cam_weights, out_dir=os.path.join(config.out_dir, 'images'), phase=phase, steps=global_steps[phase], epoch=epoch) current_loss = sum(running_loss)/len(running_loss) if phase == 'train' and epoch % config.model_intervals == 0 and current_loss < min_loss: torch.save({ 'epoch': epoch, 'seg': model.state_dict(), 'cls': classifier.state_dict(), 'lr1': opt1.param_groups[0]['lr'], 'lr2': opt2.param_groups[0]['lr'], 'loss': current_loss}, os.path.join(config.out_dir, 'models', str(epoch)+'.pt') ) running_loss.clear() min_loss = current_loss print('Saving model in {} for {} epoches'.format(config.out_dir +'/models', epoch))
workers, Ratio=Ratio, net='CNN') # Criteria = nn.CrossEntropyLoss() ######## Initial model accuracy on test dataset ####### test_loss, test_acc = test(model, device, test_loader) ################################################################################### ##############Federated learning process############## #Define visdom logs = {'train_loss': [], 'test_loss': [], 'test_acc': [], 'varepsilon': []} logs['test_acc'].append(test_acc) logs['test_loss'].append(test_loss) Results_testloss = vis.line(np.array([test_loss]), [1], win='Test_loss', opts=dict(title='Test loss on Sent140', legend=['Test loss'])) Results_testacc = vis.line(np.array(np.array([test_acc])), [1], win='Test_acc', opts=dict(title='Test accuracy on Sent140', legend=['Test accuracy'])) Results_trainloss = vis.line([0.], [1], win='Train_acc', opts=dict(title='Train loss on Sent140', legend=['Train loss'])) # Obtain information of layers Layers_num, Layers_shape, Layers_nodes = GetModelLayers(model) # Set learning rate lr = args.lr # Generate clipping bound
viz = Visdom() iter = [] #accuracy_cls = [] loss = [] print(args.file) skip_iters = 100 with open(args.file, 'r') as f: for line in f: if line.startswith("json_stats"): try: stats = json.loads(line[12:]) except: break if stats["iter"] < 100: continue iter.append(stats["iter"]) #accuracy_cls.append(stats["accuracy_cls"]) loss.append(stats["loss"]) iter_arr = np.array(iter) #accuracy_cls_arr = np.array(accuracy_cls) loss_arr = np.array(loss) viz.line(X=iter_arr, Y=loss_arr)
start_iter = 0 else: start_iter += 1 avg_loss = int(package.get('avg_loss', 0)) loss_results, cer_results, wer_results = package['loss_results'], package[ 'cer_results'], package['wer_results'] if main_proc and args.visdom and \ package[ 'loss_results'] is not None and start_epoch > 0: # Add previous scores to visdom graph x_axis = epochs[0:start_epoch] y_axis = torch.stack( (loss_results[0:start_epoch], wer_results[0:start_epoch], cer_results[0:start_epoch]), dim=1) viz_window = viz.line( X=x_axis, Y=y_axis, opts=opts, ) if main_proc and args.tensorboard and \ package[ 'loss_results'] is not None and start_epoch > 0: # Previous scores to tensorboard logs for i in range(start_epoch): values = { 'Avg Train Loss': loss_results[i], 'Avg WER': wer_results[i], 'Avg CER': cer_results[i] } tensorboard_writer.add_scalars(args.id, values, i + 1) else: with open(args.labels_path) as label_file: labels = str(''.join(json.load(label_file)))
Giter)) torch.save( netD.state_dict(), '{0}/netD_epoch_{1}_Giter_{2}.pth'.format(checkpoint_path, epoch, Giter)) vis = Visdom(port=8097, server=args.visdom_host) ZERO = torch.zeros(1).cpu() oL2LossD_real = dict(xlabel='minibatches', ylabel='loss', title='L2LossD_real') oL2LossD_fake = dict(xlabel='minibatches', ylabel='loss', title='L2LossD_fake') oL2LossD = dict(xlabel='minibatches', ylabel='loss', title='L2LossD') oRegLossD = dict(xlabel='minibatches', ylabel='loss', title='RegLossD') oTotalLoss = dict(xlabel='minibatches', ylabel='loss', title='TotalLoss') wL2LossD_real = vis.line(X=ZERO, Y=ZERO, opts=oL2LossD_real) wL2LossD_fake = vis.line(X=ZERO, Y=ZERO, opts=oL2LossD_fake) wL2LossD = vis.line(X=ZERO, Y=ZERO, opts=oL2LossD) wRegLossD = vis.line(X=ZERO, Y=ZERO, opts=oRegLossD) wTotalLoss = vis.line(X=ZERO, Y=ZERO, opts=oTotalLoss) visdom_windows = {} def save_images(img_path, real_cpu): real_cpu = real_cpu[:genImg_num].mul(0.5).add(0.5) b, c, h, w = real_cpu.shape if c == 11: for _range, name in zip(dataset.layerRanges, dataset.layerKeys): _real_cpu = real_cpu[:, list(range(*_range)), :, :] if 'real_' + name not in visdom_windows:
def main(): args = parser.parse_args() cf = ConfigParser.ConfigParser() try: cf.read(args.conf) except: print("conf file not exists") sys.exit(1) USE_CUDA = cf.getboolean('Training', 'use_cuda') try: seed = long(cf.get('Training', 'seed')) except: seed = torch.cuda.initial_seed() cf.set('Training', 'seed', seed) cf.write(open(args.conf, 'w')) torch.manual_seed(seed) if USE_CUDA: torch.cuda.manual_seed(seed) log_dir = cf.get('Data', 'log_dir') log_file = os.path.join(log_dir, cf.get('Data', 'log_file')) logger = init_logger(log_file) #Define Model rnn_input_size = cf.getint('Model', 'rnn_input_size') rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size') rnn_layers = cf.getint('Model', 'rnn_layers') rnn_type = RNN[cf.get('Model', 'rnn_type')] bidirectional = cf.getboolean('Model', 'bidirectional') batch_norm = cf.getboolean('Model', 'batch_norm') rnn_param = {"rnn_input_size":rnn_input_size, "rnn_hidden_size":rnn_hidden_size, "rnn_layers":rnn_layers, "rnn_type":rnn_type, "bidirectional":bidirectional, "batch_norm":batch_norm} num_class = cf.getint('Model', 'num_class') drop_out = cf.getfloat('Model', 'drop_out') model = CTC_Model(rnn_param=rnn_param, num_class=num_class, drop_out=drop_out) print("Model Structure:") logger.info("Model Structure:") for idx, m in enumerate(model.children()): print(idx, m) logger.info(str(idx) + "->" + str(m)) data_dir = cf.get('Data', 'data_dir') batch_size = cf.getint("Training", 'batch_size') #Data Loader train_dataset = SpeechDataset(data_dir, data_set='train') dev_dataset = SpeechDataset(data_dir, data_set="dev") train_loader = SpeechDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_loader = SpeechDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) #ensure the feats is equal to the rnn_input_Size assert train_dataset.n_feats == rnn_input_size #decoder for dev set decoder = GreedyDecoder(int2char, space_idx=len(int2char) - 1, blank_index=0) #Training init_lr = cf.getfloat('Training', 'init_lr') num_epoches = cf.getint('Training', 'num_epoches') end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc') decay = cf.getfloat("Training", 'lr_decay') weight_decay = cf.getfloat("Training", 'weight_decay') params = { 'num_epoches':num_epoches, 'end_adjust_acc':end_adjust_acc, 'seed':seed, 'decay':decay, 'learning_rate':init_lr, 'weight_decay':weight_decay, 'batch_size':batch_size, 'n_feats':train_dataset.n_feats } print(params) if USE_CUDA: model = model.cuda() loss_fn = CTCLoss() optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, weight_decay=weight_decay) #visualization for training from visdom import Visdom viz = Visdom() title = 'TIMIT LSTM_CTC Acoustic Model' opts = [dict(title=title+" Loss", ylabel = 'Loss', xlabel = 'Epoch'), dict(title=title+" Loss on Dev", ylabel = 'DEV Loss', xlabel = 'Epoch'), dict(title=title+' CER on DEV', ylabel = 'DEV CER', xlabel = 'Epoch')] viz_window = [None, None, None] count = 0 learning_rate = init_lr loss_best = 1000 loss_best_true = 1000 adjust_rate_flag = False stop_train = False adjust_time = 0 acc_best = 0 start_time = time.time() loss_results = [] dev_loss_results = [] dev_cer_results = [] while not stop_train: if count >= num_epoches: break count += 1 if adjust_rate_flag: learning_rate *= decay adjust_rate_flag = False for param in optimizer.param_groups: param['lr'] *= decay print("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) logger.info("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) loss = train(model, train_loader, loss_fn, optimizer, logger, print_every=20, USE_CUDA=USE_CUDA) loss_results.append(loss) acc, dev_loss = dev(model, dev_loader, loss_fn, decoder, logger, USE_CUDA=USE_CUDA) print("loss on dev set is %.4f" % dev_loss) logger.info("loss on dev set is %.4f" % dev_loss) dev_loss_results.append(dev_loss) dev_cer_results.append(acc) #adjust learning rate by dev_loss #adjust_rate_count : 表示连续超过count个epoch的loss在end_adjust_acc区间内认为稳定 if dev_loss < (loss_best - end_adjust_acc): loss_best = dev_loss loss_best_true = dev_loss adjust_rate_count = 0 acc_best = acc best_model_state = copy.deepcopy(model.state_dict()) best_op_state = copy.deepcopy(optimizer.state_dict()) elif (dev_loss < loss_best + end_adjust_acc): adjust_rate_count += 1 if dev_loss < loss_best and dev_loss < loss_best_true: loss_best_true = dev_loss acc_best = acc best_model_state = copy.deepcopy(model.state_dict()) best_op_state = copy.deepcopy(optimizer.state_dict()) else: adjust_rate_count = 10 print("adjust_rate_count: %d" % adjust_rate_count) print('adjust_time: %d' % adjust_time) logger.info("adjust_rate_count: %d" % adjust_rate_count) logger.info('adjust_time: %d' % adjust_time) if adjust_rate_count == 10: adjust_rate_flag = True adjust_time += 1 adjust_rate_count = 0 if loss_best > loss_best_true: loss_best = loss_best_true model.load_state_dict(best_model_state) optimizer.load_state_dict(best_op_state) if adjust_time == 8: stop_train = True time_used = (time.time() - start_time) / 60 print("epoch %d done, dev acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) logger.info("epoch %d done, dev acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) x_axis = range(count) y_axis = [loss_results[0:count], dev_loss_results[0:count], dev_cer_results[0:count]] for x in range(len(viz_window)): if viz_window[x] is None: viz_window[x] = viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), opts = opts[x],) else: viz.line(X = np.array(x_axis), Y = np.array(y_axis[x]), win = viz_window[x], update = 'replace',) print("End training, best dev loss is: %.4f, acc is: %.4f" % (loss_best_true, acc_best)) logger.info("End training, best dev loss acc is: %.4f, acc is: %.4f" % (loss_best_true, acc_best)) model.load_state_dict(best_model_state) optimizer.load_state_dict(best_op_state) best_path = os.path.join(log_dir, 'best_model'+'_dev'+str(acc_best)+'.pkl') cf.set('Model', 'model_file', best_path) cf.write(open(args.conf, 'w')) params['epoch']=count torch.save(CTC_Model.save_package(model, optimizer=optimizer, epoch=params, loss_results=loss_results, dev_loss_results=dev_loss_results, dev_cer_results=dev_cer_results), best_path)
columnnames=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], rownames=['y1', 'y2', 'y3', 'y4', 'y5'], colormap='Electric', )) # contour x = np.tile(np.arange(1, 101), (100, 1)) y = x.transpose() X = np.exp((((x - 50)**2) + ((y - 50)**2)) / -(20.0**2)) viz.contour(X=X, opts=dict(colormap='Viridis')) # surface viz.surf(X=X, opts=dict(colormap='Hot')) # line plots viz.line(Y=np.random.rand(10), opts=dict(showlegend=True)) Y = np.linspace(-5, 5, 100) viz.line( Y=np.column_stack((Y * Y, np.sqrt(Y + 5))), X=np.column_stack((Y, Y)), opts=dict(markers=False), ) # line using WebGL webgl_num_points = 200000 webgl_x = np.linspace(-1, 0, webgl_num_points) webgl_y = webgl_x**3 viz.line(X=webgl_x, Y=webgl_y, opts=dict(title='{} points using WebGL'.format(webgl_num_points),
rownames=['y1', 'y2', 'y3', 'y4', 'y5'], colormap='Electric', ) ) # contour x = np.tile(np.arange(1, 101), (100, 1)) y = x.transpose() X = np.exp((((x - 50) ** 2) + ((y - 50) ** 2)) / -(20.0 ** 2)) viz.contour(X=X, opts=dict(colormap='Viridis')) # surface viz.surf(X=X, opts=dict(colormap='Hot')) # line plots viz.line(Y=np.random.rand(10)) Y = np.linspace(-5, 5, 100) viz.line( Y=np.column_stack((Y * Y, np.sqrt(Y + 5))), X=np.column_stack((Y, Y)), opts=dict(markers=False), ) # line updates win = viz.line( X=np.column_stack((np.arange(0, 10), np.arange(0, 10))), Y=np.column_stack((np.linspace(5, 10, 10), np.linspace(5, 10, 10) + 5)), ) viz.line( X=np.column_stack((np.arange(10, 20), np.arange(10, 20))),
labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 50 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, num_epochs, i + 1, total_step, loss.item())) vis.line(X=torch.FloatTensor([epoch]), Y=torch.FloatTensor([loss.item()]), win='total_loss', update='append' if i > 0 else None) # Test the model model.eval() # eval mode acc = np.zeros(6) with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()
# times=range(0,60,3) # tt=[] # for i in ttt: # tt.append(i[:20]) # ttt=tt # textwindow = viz.text('Hello World!') win = viz.line( X=np.array(times), Y=np.array(ttt[0]), opts=dict( markersize=10, markers=1, legend=['0-shot accuracy'], ylabel='Percent Correct', xlabel='Episode', xtick=1, ytick=1, # xtype='log' ) ) viz.updateTrace( X=np.array(times), Y=np.array(ttt[1]), win=win, name='1-shot accuracy', ) viz.updateTrace( X=np.array(times), Y=np.array(ttt[2]),
def main(): args = parser.parse_args() save_folder = args.save_folder loss_results, cer_results, wer_results = torch.Tensor( args.epochs), torch.Tensor(args.epochs), torch.Tensor(args.epochs) best_wer = None if args.visdom: from visdom import Visdom viz = Visdom() opts = [ dict(title=args.visdom_id + ' Loss', ylabel='Loss', xlabel='Epoch'), dict(title=args.visdom_id + ' WER', ylabel='WER', xlabel='Epoch'), dict(title=args.visdom_id + ' CER', ylabel='CER', xlabel='Epoch') ] viz_windows = [None, None, None] epochs = torch.arange(1, args.epochs + 1) if args.tensorboard: from logger import TensorBoardLogger try: os.makedirs(args.log_dir) except OSError as e: if e.errno == errno.EEXIST: print('Directory already exists.') for file in os.listdir(args.log_dir): file_path = os.path.join(args.log_dir, file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: raise else: raise logger = TensorBoardLogger(args.log_dir) try: os.makedirs(save_folder) except OSError as e: if e.errno == errno.EEXIST: print('Directory already exists.') else: raise criterion = CTCLoss() with open(args.labels_path) as label_file: labels = str(''.join(json.load(label_file))) audio_conf = dict(sample_rate=args.sample_rate, window_size=args.window_size, window_stride=args.window_stride, window=args.window, noise_dir=args.noise_dir, noise_prob=args.noise_prob, noise_levels=(args.noise_min, args.noise_max)) train_dataset = SpectrogramDataset(audio_conf=audio_conf, manifest_filepath=args.train_manifest, labels=labels, normalize=True, augment=args.augment) test_dataset = SpectrogramDataset(audio_conf=audio_conf, manifest_filepath=args.val_manifest, labels=labels, normalize=True, augment=False) train_loader = AudioDataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_loader = AudioDataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) rnn_type = args.rnn_type.lower() assert rnn_type in supported_rnns, "rnn_type should be either lstm, rnn or gru" model = DeepSpeech(rnn_hidden_size=args.hidden_size, nb_layers=args.hidden_layers, labels=labels, rnn_type=supported_rnns[rnn_type], audio_conf=audio_conf, bidirectional=True) parameters = model.parameters() optimizer = torch.optim.SGD(parameters, lr=args.lr, momentum=args.momentum, nesterov=True) decoder = GreedyDecoder(labels) if args.continue_from: print("Loading checkpoint model %s" % args.continue_from) package = torch.load(args.continue_from) model.load_state_dict(package['state_dict']) optimizer.load_state_dict(package['optim_dict']) start_epoch = int(package.get( 'epoch', 1)) - 1 # Python index start at 0 for training start_iter = package.get('iteration', None) if start_iter is None: start_epoch += 1 # Assume that we saved a model after an epoch finished, so start at the next epoch. start_iter = 0 else: start_iter += 1 avg_loss = int(package.get('avg_loss', 0)) loss_results, cer_results, wer_results = package[ 'loss_results'], package['cer_results'], package['wer_results'] if args.visdom and \ package['loss_results'] is not None and start_epoch > 0: # Add previous scores to visdom graph x_axis = epochs[0:start_epoch] y_axis = [ loss_results[0:start_epoch], wer_results[0:start_epoch], cer_results[0:start_epoch] ] for x in range(len(viz_windows)): viz_windows[x] = viz.line( X=x_axis, Y=y_axis[x], opts=opts[x], ) if args.tensorboard and \ package['loss_results'] is not None and start_epoch > 0: # Previous scores to tensorboard logs for i in range(start_epoch): info = { 'Avg Train Loss': loss_results[i], 'Avg WER': wer_results[i], 'Avg CER': cer_results[i] } for tag, val in info.items(): logger.scalar_summary(tag, val, i + 1) if not args.no_bucketing and epoch != 0: print("Using bucketing sampler for the following epochs") train_dataset = SpectrogramDatasetWithLength( audio_conf=audio_conf, manifest_filepath=args.train_manifest, labels=labels, normalize=True, augment=args.augment) sampler = BucketingSampler(train_dataset) train_loader.sampler = sampler else: avg_loss = 0 start_epoch = 0 start_iter = 0 if args.cuda: model = torch.nn.DataParallel(model).cuda() print(model) print("Number of parameters: %d" % DeepSpeech.get_param_size(model)) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() for epoch in range(start_epoch, args.epochs): model.train() end = time.time() for i, (data) in enumerate(train_loader, start=start_iter): if i == len(train_loader): break inputs, targets, input_percentages, target_sizes = data # measure data loading time data_time.update(time.time() - end) inputs = Variable(inputs, requires_grad=False) target_sizes = Variable(target_sizes, requires_grad=False) targets = Variable(targets, requires_grad=False) if args.cuda: inputs = inputs.cuda() out = model(inputs) out = out.transpose(0, 1) # TxNxH seq_length = out.size(0) sizes = Variable(input_percentages.mul_(int(seq_length)).int(), requires_grad=False) loss = criterion(out, targets, sizes, target_sizes) loss = loss / inputs.size(0) # average the loss by minibatch loss_sum = loss.data.sum() inf = float("inf") if loss_sum == inf or loss_sum == -inf: print("WARNING: received an inf loss, setting loss value to 0") loss_value = 0 else: loss_value = loss.data[0] avg_loss += loss_value losses.update(loss_value, inputs.size(0)) # compute gradient optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), args.max_norm) # SGD step optimizer.step() if args.cuda: torch.cuda.synchronize() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if not args.silent: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( (epoch + 1), (i + 1), len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) if args.checkpoint_per_batch > 0 and i > 0 and ( i + 1) % args.checkpoint_per_batch == 0: file_path = '%s/deepspeech_checkpoint_epoch_%d_iter_%d.pth.tar' % ( save_folder, epoch + 1, i + 1) print("Saving checkpoint model to %s" % file_path) torch.save( DeepSpeech.serialize(model, optimizer=optimizer, epoch=epoch, iteration=i, loss_results=loss_results, wer_results=wer_results, cer_results=cer_results, avg_loss=avg_loss), file_path) del loss del out avg_loss /= len(train_loader) print('Training Summary Epoch: [{0}]\t' 'Average Loss {loss:.3f}\t'.format(epoch + 1, loss=avg_loss)) start_iter = 0 # Reset start iteration for next epoch total_cer, total_wer = 0, 0 model.eval() for i, (data) in enumerate(test_loader): # test inputs, targets, input_percentages, target_sizes = data inputs = Variable(inputs, volatile=True) # unflatten targets split_targets = [] offset = 0 for size in target_sizes: split_targets.append(targets[offset:offset + size]) offset += size if args.cuda: inputs = inputs.cuda() out = model(inputs) out = out.transpose(0, 1) # TxNxH seq_length = out.size(0) sizes = input_percentages.mul_(int(seq_length)).int() decoded_output = decoder.decode(out.data, sizes) target_strings = decoder.process_strings( decoder.convert_to_strings(split_targets)) wer, cer = 0, 0 for x in range(len(target_strings)): wer += decoder.wer(decoded_output[x], target_strings[x]) / float( len(target_strings[x].split())) cer += decoder.cer(decoded_output[x], target_strings[x]) / float( len(target_strings[x])) total_cer += cer total_wer += wer if args.cuda: torch.cuda.synchronize() del out wer = total_wer / len(test_loader.dataset) cer = total_cer / len(test_loader.dataset) wer *= 100 cer *= 100 loss_results[epoch] = avg_loss wer_results[epoch] = wer cer_results[epoch] = cer print('Validation Summary Epoch: [{0}]\t' 'Average WER {wer:.3f}\t' 'Average CER {cer:.3f}\t'.format(epoch + 1, wer=wer, cer=cer)) if args.visdom: # epoch += 1 x_axis = epochs[0:epoch + 1] y_axis = [ loss_results[0:epoch + 1], wer_results[0:epoch + 1], cer_results[0:epoch + 1] ] for x in range(len(viz_windows)): if viz_windows[x] is None: viz_windows[x] = viz.line( X=x_axis, Y=y_axis[x], opts=opts[x], ) else: viz.line( X=x_axis, Y=y_axis[x], win=viz_windows[x], update='replace', ) if args.tensorboard: info = {'Avg Train Loss': avg_loss, 'Avg WER': wer, 'Avg CER': cer} for tag, val in info.items(): logger.scalar_summary(tag, val, epoch + 1) if args.log_params: for tag, value in model.named_parameters(): tag = tag.replace('.', '/') logger.histo_summary(tag, to_np(value), epoch + 1) logger.histo_summary(tag + '/grad', to_np(value.grad), epoch + 1) if args.checkpoint: file_path = '%s/deepspeech_%d.pth.tar' % (save_folder, epoch + 1) torch.save( DeepSpeech.serialize(model, optimizer=optimizer, epoch=epoch, loss_results=loss_results, wer_results=wer_results, cer_results=cer_results), file_path) # anneal lr optim_state = optimizer.state_dict() optim_state['param_groups'][0][ 'lr'] = optim_state['param_groups'][0]['lr'] / args.learning_anneal optimizer.load_state_dict(optim_state) print('Learning rate annealed to: {lr:.6f}'.format( lr=optim_state['param_groups'][0]['lr'])) if best_wer is None or best_wer > wer: print("Found better validated model, saving to %s" % args.model_path) torch.save( DeepSpeech.serialize(model, optimizer=optimizer, epoch=epoch, loss_results=loss_results, wer_results=wer_results, cer_results=cer_results), args.model_path) best_wer = wer avg_loss = 0 if not args.no_bucketing and epoch == 0: print("Switching to bucketing sampler for following epochs") train_dataset = SpectrogramDatasetWithLength( audio_conf=audio_conf, manifest_filepath=args.train_manifest, labels=labels, normalize=True, augment=args.augment) sampler = BucketingSampler(train_dataset) train_loader.sampler = sampler
class Visdom_Plot(object): def __init__(self, port=8097, env_name='main'): #当‘env_name'不是main时,创建一个新环境 self.viz = Visdom(port=port, env=env_name) self.loss_win = {} self.acc_win = {} self.text_win = {} self.plt_img_win = {} self.images_win = {} self.gray_win = {} def _new_win(self, type='loss_win', win_name='default_loss_win', id='train_loss', H_img=100): ''' type: loss_win, acc_win, text_win, plt_img_win name: default is the default win in class. you can specify a window's name id: the line's name ''' assert type in [ 'loss_win', 'acc_win', 'text_win', 'plt_img_win', 'gray_win' ], "win type must a string inside ['loss_win', 'acc_win', 'text_win', 'plt_img_win'] " if type == 'loss_win': self.loss_win[win_name] = self.viz.line(X=np.array([0]), Y=np.array([0]), name=id, opts=dict( xlabel='Epoch.batch', ylabel='Loss', title=win_name, marginleft=60, marginbottom=60, margintop=80, width=800, height=600, )) elif type == 'acc_win': self.acc_win[win_name] = self.viz.line(X=np.array([0]), Y=np.array([0]), name=id, opts=dict( xlabel='Epoch.batch', ylabel='Top1 accuracy', title=win_name, showlegend=True, markercolor=np.array( [[255, 0, 0]]), marginleft=60, marginbottom=60, margintop=60, width=800, height=600, )) elif type == 'plt_img_win' or type == 'gray_win': getattr(self, type)[win_name] = self.viz.images(np.random.randn( 1, 3, 100, 100), opts=dict( height=H_img * 5, width=H_img * 5, )) elif type == 'text_win': self.text_win[win_name] = self.viz.text('Text Window') def append_loss(self, loss, epoch_batches, win_name='default_loss_win', id='train_loss'): if win_name not in self.loss_win: self._new_win(type='loss_win', win_name=win_name, id=id) self.viz.line(X=np.array([epoch_batches]), Y=np.array([loss]), win=self.loss_win[win_name], name=id, opts=dict(showlegend=True), update='append') def append_acc(self, train_acc, epoch_batches, win_name='default_acc_win', id='train_acc'): if win_name not in self.acc_win: self._new_win(type='acc_win', win_name=win_name, id=id) self.viz.line(X=np.array([epoch_batches]), Y=np.array([train_acc]), win=self.acc_win[win_name], name=id, opts=dict(showlegend=True), update='append') def lr_scatter(self, epoch, lr, win_name='default_acc_win'): self.viz.scatter( X=np.array([[epoch, 20]]), name='lr=' + str(lr), win=self.acc_win[win_name], opts=dict(showlegend=True), update='append', ) def img_plot(self, images, lm=None, mode='update', caption=''): ''' Input: images : tensors, N x 3 x H x W, so transfer to N x H x W x 3 is needed lm : N x K x 2, is not None, then landmarks will be scattered. ''' win_exist = len(self.plt_img_win) N, C, H, W = images.size() if N > win_exist: for i in range(win_exist, N, 1): self._new_win(type='plt_img_win', win_name='image' + str(i), H_img=H) if lm is not None: N, K, m = lm.size() assert N == images.size( )[0] and m == 2, "landmarks have illegal size" lm = lm.cpu() images = images.cpu() plt.figure(figsize=(H * 0.06, W * 0.06)) for n, image in enumerate(images[:]): # print(image.size()) image = image.transpose(0, 1).transpose(1, 2) # print(image.size()) plt.imshow(image.detach().numpy( )) # convert to H x W x 3. plt的输入是HxWx3,而viz.images())的输入是3xHxW if lm is not None: color = np.linspace(0, 1, num=K) plt.scatter(x=lm[n, :, 0].detach().numpy(), y=lm[n, :, 1].detach().numpy(), c=color, marker='x', s=200) self.viz.matplot(plt, win=self.plt_img_win['image' + str(n)], opts=dict(caption='image' + str(n))) plt.clf() def images(self, images, win_name='default_images_win'): ''' Input: images:N x 3 x H x W, tensors ''' images = images.cpu() if win_name not in self.images_win: self.images_win[win_name] = self.viz.images( images.detach().numpy()) else: self.viz.images(images.detach().numpy(), win=self.images_win[win_name]) def gray_images(self, images, win_name='default_gray_win'): ''' Input: images : K x H x W, tensors ''' images = images.cpu() win_exist = len(self.gray_win) K, H, W = images.size() if K > win_exist: for i in range(win_exist, K, 1): self._new_win(type='gray_win', win_name='gray' + str(i), H_img=H // 2) plt.figure(figsize=(H / 2 * 0.06, W / 2 * 0.06)) for n, image in enumerate(images): plt.imshow(image.detach().numpy( )) # convert to H x W x 3. plt的输入是HxWx3,而viz.images())的输入是3xHxW self.viz.matplot(plt, win=self.gray_win['gray' + str(n)]) plt.clf() def append_text(self, text, win_name='default_text_win', append=True): if win_name not in self.text_win: self._new_win(type='text_win', win_name=win_name) self.viz.text(text, win=self.text_win[win_name], append=append)
def train(): viz = Visdom() line = viz.line(np.arange(2)) mycropus = cropus() n_class = mycropus.n_class if os.path.isfile('save/model.pt'): model = torch.load('save/model.pt') else: model = Encoder(n_src_vocab=len(mycropus.token2idx), n_max_seq=mycropus.max_len).cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.Adam(model.parameters(), lr=0.0005) train_loss_p = [] train_acc_p = [] dev_loss_p = [] dev_acc_p = [] step_p = [] for epoch in range(100): step = 0 tr_loss_list = [] tr_acc_list = [] best_dev_acc = 0.3 for batch_data in mycropus.batch_iterator(mycropus.train_data): inp, pos, tag = [x[0] for x in batch_data ], [x[1] for x in batch_data ], [x[2] for x in batch_data] inp = Variable(torch.from_numpy(np.array(inp)).cuda()) pos = Variable(torch.from_numpy(np.array(pos)).cuda()) tag = Variable(torch.LongTensor(torch.from_numpy( np.array(tag)))).cuda() preds = model(inp, pos) loss = criterion(preds, tag) _, pred_idx = torch.max(preds, 1) tr_acc_list.append( (sum(pred_idx.cpu().data.numpy() == tag.cpu().data.numpy()) * 1. / tag.size(0))) tr_loss_list.append(loss.cpu().data.numpy()) optimizer.zero_grad() loss.backward() # 剪裁参数梯度 nn.utils.clip_grad_norm(model.parameters(), 1, norm_type=2) optimizer.step() step = step + 1 if step % 100 == 0: print("epoch:{},step:{},mean_loss:{},mean_acc:{}".format( epoch, step, np.mean(tr_loss_list), np.mean(tr_acc_list))) dev_acc, dev_loss = get_dev_loss(model, mycropus, mycropus.dev_data, criterion) if best_dev_acc < dev_acc: torch.save(model, 'save/model.pt') best_dev_acc = dev_acc print("-----------") train_loss_p.append(np.mean(tr_loss_list)) train_acc_p.append(np.mean(tr_acc_list)) dev_loss_p.append(np.mean(dev_loss)) dev_acc_p.append(np.mean(dev_acc)) step_p.append(step + epoch * mycropus.nums_batch) viz.line(X=np.column_stack( (np.array(step_p), np.array(step_p), np.array(step_p), np.array(step_p))), Y=np.column_stack( (np.array(train_loss_p), np.array(train_acc_p), np.array(dev_loss_p), np.array(dev_acc_p))), win=line, opts=dict(legend=[ "Train_mean_loss", "Train_acc", "Eval_mean_loss", "Eval_acc" ])) tr_loss_list = [] tr_acc_list = []
class Callback(object): """ Used to log/visualize the evaluation metrics during training. The values are stored at the end of each epoch. """ def __init__(self, metrics): """ Args: metrics : a list of callbacks. Possible values: "CoherenceMetric" "PerplexityMetric" "DiffMetric" "ConvergenceMetric" """ # list of metrics to be plot self.metrics = metrics def set_model(self, model): """ Save the model instance and initialize any required variables which would be updated throughout training """ self.model = model self.previous = None # check for any metric which need model state from previous epoch if any(isinstance(metric, (DiffMetric, ConvergenceMetric)) for metric in self.metrics): self.previous = copy.deepcopy(model) # store diff diagonals of previous epochs self.diff_mat = Queue() if any(metric.logger == "visdom" for metric in self.metrics): if not VISDOM_INSTALLED: raise ImportError("Please install Visdom for visualization") self.viz = Visdom() # store initial plot windows of every metric (same window will be updated with increasing epochs) self.windows = [] if any(metric.logger == "shell" for metric in self.metrics): # set logger for current topic model self.log_type = logging.getLogger('gensim.models.ldamodel') def on_epoch_end(self, epoch, topics=None): """ Log or visualize current epoch's metric value Args: epoch : current epoch no. topics : topic distribution from current epoch (required for coherence of unsupported topic models) """ # stores current epoch's metric values current_metrics = {} # plot all metrics in current epoch for i, metric in enumerate(self.metrics): label = str(metric) value = metric.get_value(topics=topics, model=self.model, other_model=self.previous) current_metrics[label] = value if metric.logger == "visdom": if epoch == 0: if value.ndim > 0: diff_mat = np.array([value]) viz_metric = self.viz.heatmap( X=diff_mat.T, env=metric.viz_env, opts=dict(xlabel='Epochs', ylabel=label, title=label) ) # store current epoch's diff diagonal self.diff_mat.put(diff_mat) # saving initial plot window self.windows.append(copy.deepcopy(viz_metric)) else: viz_metric = self.viz.line( Y=np.array([value]), X=np.array([epoch]), env=metric.viz_env, opts=dict(xlabel='Epochs', ylabel=label, title=label) ) # saving initial plot window self.windows.append(copy.deepcopy(viz_metric)) else: if value.ndim > 0: # concatenate with previous epoch's diff diagonals diff_mat = np.concatenate((self.diff_mat.get(), np.array([value]))) self.viz.heatmap( X=diff_mat.T, env=metric.viz_env, win=self.windows[i], opts=dict(xlabel='Epochs', ylabel=label, title=label) ) self.diff_mat.put(diff_mat) else: self.viz.updateTrace( Y=np.array([value]), X=np.array([epoch]), env=metric.viz_env, win=self.windows[i] ) if metric.logger == "shell": statement = "".join(("Epoch ", str(epoch), ": ", label, " estimate: ", str(value))) self.log_type.info(statement) # check for any metric which need model state from previous epoch if isinstance(metric, (DiffMetric, ConvergenceMetric)): self.previous = copy.deepcopy(self.model) return current_metrics
'\n {} Results \n (UN is {}, SP is {}, BZ is {}, LR is {}, SNR is {}, CB is {})' .format(date, args.users_total, args.user_sel_prob, args.batch_size, args.lr, args.z, args.grad_upper_bound)) with open('../results/SHAKESPEARE/FixDP_Asyn_08flat_Budget.txt', 'a+') as fl: fl.write( '\n {} Results \n (UN is {}, SP is {}, BZ is {}, LR is {}, SNR is {}, CB is {})' .format(date, args.users_total, args.user_sel_prob, args.batch_size, args.lr, args.z, args.grad_upper_bound)) ################################################################################### # Logging dictionary logs = {'train_loss': [], 'test_loss': [], 'test_acc': [], 'varepsilon': []} # Visdom Results_testloss = vis.line(np.array([test_loss.numpy()]), [1], win='Test_loss', opts=dict(title='Test loss on Shakes', legend=['Test loss'])) Results_testacc = vis.line(np.array([test_acc.numpy()]), [1], win='Test_acc', opts=dict(title='Test accuracy on Shakes', legend=['Test accuracy'])) Results_trainloss = vis.line([0.], [1], win='Train_acc', opts=dict(title='Train loss on Shakes', legend=['Train loss'])) Results_varepsilon = vis.line([0.], [1], win='varepsilon', opts=dict(title='Pirvacy budget on Shakes', legend=['Test accuracy'])) ###################################################################################