def main(args): # show configuration print(args) random_seed = None if random_seed is not None: random.seed(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) # set log file log = sys.stdout if args.log_file is not None: log = open(args.log_file, "a") # set device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() if torch.cuda.is_available(): print("device is cuda, # cuda is: ", n_gpu) else: print("device is cpu") # process word vectors and datasets if not args.processed_data: prepro(args) # load word vectors and datasets wv_tensor = torch.FloatTensor( np.array(pickle_load_large_file(args.word_emb_file), dtype=np.float32)) cv_tensor = torch.FloatTensor( np.array(pickle_load_large_file(args.char_emb_file), dtype=np.float32)) wv_word2ix = pickle_load_large_file(args.word_dictionary) train_dataloader = get_loader(args.train_examples_file, args.batch_size, shuffle=True) dev_dataloader = get_loader(args.dev_examples_file, args.batch_size, shuffle=True) # construct model model = QANet(wv_tensor, cv_tensor, args.para_limit, args.ques_limit, args.d_model, num_head=args.num_head, train_cemb=(not args.pretrained_char), pad=wv_word2ix["<PAD>"]) model.summary() if torch.cuda.device_count() > 1 and args.multi_gpu: model = nn.DataParallel(model) model.to(device) # exponential moving average ema = EMA(args.decay) if args.use_ema: for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param.data) # set optimizer and scheduler parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(params=parameters, lr=args.lr, betas=(args.beta1, args.beta2), eps=1e-8, weight_decay=3e-7) cr = 1.0 / math.log(args.lr_warm_up_num) scheduler = optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda ee: cr * math.log(ee + 1) if ee < args.lr_warm_up_num else 1) # set loss, metrics loss = torch.nn.CrossEntropyLoss() # set visdom visualizer to store training process information # see the training process on http://localhost:8097/ vis = None if args.visualizer: os.system("python -m visdom.server") vis = Visualizer("main") # construct trainer # an identifier (prefix) for saved model identifier = type(model).__name__ + '_' trainer = Trainer(args, model, loss, train_data_loader=train_dataloader, dev_data_loader=dev_dataloader, train_eval_file=args.train_eval_file, dev_eval_file=args.dev_eval_file, optimizer=optimizer, scheduler=scheduler, epochs=args.epochs, with_cuda=args.with_cuda, save_dir=args.save_dir, verbosity=args.verbosity, save_freq=args.save_freq, print_freq=args.print_freq, resume=args.resume, identifier=identifier, debug=args.debug, debug_batchnum=args.debug_batchnum, lr=args.lr, lr_warm_up_num=args.lr_warm_up_num, grad_clip=args.grad_clip, decay=args.decay, visualizer=vis, logger=log, use_scheduler=args.use_scheduler, use_grad_clip=args.use_grad_clip, use_ema=args.use_ema, ema=ema, use_early_stop=args.use_early_stop, early_stop=args.early_stop) # start training! start = datetime.now() trainer.train() print("Time of training model ", datetime.now() - start)
from torch.optim.lr_scheduler import ReduceLROnPlateau import torch import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from torch.autograd import Variable as V from torchnet import meter from config.config import cfg from util.visualize import Visualizer import cv2 import numpy as np from lib.core.visImage import tensor_to_np # create visulized env vis = Visualizer("reidatt", port=8097) # measures created AP = meter.APMeter() mAP = meter.mAPMeter() Loss_meter = meter.AverageValueMeter() # set cuda env os.environ["CUDA_VISIBLE_DEVICES"] = "1" def inverse_normalize(img): # if opt.caffe_pretrain: # img = img + (np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1)) # return img[::-1, :, :] # approximate un-normalize for visualize return (img * 0.225 + 0.45).clip(min=0, max=1) * 255
from torch.optim.lr_scheduler import ReduceLROnPlateau import torch import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from torch.autograd import Variable as V import torch as t from torchnet import meter from util.config import cfg from util.visualize import Visualizer from lib.core.visImage import imshow #create visulized env vis = Visualizer(cfg.SYSTEM.NAME, port=8097) #measures created AP = meter.APMeter() mAP = meter.mAPMeter() Loss_meter = meter.AverageValueMeter() #set cuda env os.environ["CUDA_VISIBLE_DEVICES"] = "0" def train_epoch(model, training_data, optimizer): ''' Epoch operation in training phase''' AP.reset() mAP.reset() Loss_meter.reset() for batch_idx, (data, target) in enumerate(training_data): data = data.cuda()
import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from torch.autograd import Variable as V from torchnet import meter from config.config import cfg from util.visualize import Visualizer from util.show_masked_image import show_masked_image from mmcv.runner import save_checkpoint, load_checkpoint import cv2 from util.show_masked_image import tensor_to_np import numpy as np #cfg.merge_from_file("config/un_att_pascal_0001.yaml") cfg.freeze() # 冻结参数 vis = Visualizer("newvis", port=8097) AP = meter.APMeter() mAP = meter.mAPMeter() top3 = meter.ClassErrorMeter(topk=[1, 3, 5], accuracy=True) Loss_meter = meter.AverageValueMeter() os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2" num = 30 def visualize_func(result): pass def inverse_normalize(img): #if opt.caffe_pretrain:
def main(args): print(args) with dg.guard(): model, criterion, postprocessors = build_model(args) print("building dataset") dataset_train = build_dataset(image_set="train", args=args) dataset_val = build_dataset(image_set="val", args=args) clip = F.clip.GradientClipByValue(max=args.clip_max_norm) optimizer = F.optimizer.AdamOptimizer( parameter_list=model.parameters(), learning_rate=args.lr, grad_clip=clip) dataset_train_reader = dataset_train.batch_reader(args.batch_size) dataset_val_reader = dataset_val.batch_reader(args.batch_size) output_dir = os.path.join(args.output_dir, args.dataset_file) if args.resume: print("Loading pretrained model from path: " + args.resume) state_dict, _ = F.load_dygraph(args.resume) model.load_dict(state_dict) visualizer = Visualizer(postprocessors["bbox"], output_dir, dataset_train.object_names, args) print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): train_stats = train_one_epoch(model, criterion, dataset_train, optimizer, visualizer, epoch, args.clip_max_norm, args) if args.output_dir: checkpoint_paths = [os.path.join(output_dir, 'checkpoint')] # extra checkpoint before LR drop and every 100 epochs if (epoch + 1) % 100 == 0: checkpoint_paths.append( os.path.join(output_dir, f"checkpoint{epoch:04}")) for checkpoint_path in checkpoint_paths: F.save_dygraph(model.state_dict(), checkpoint_path) test_stats = evaluate(model, criterion, dataset_val, visualizer, args.output_dir, args) log_stats = { **{f"train_{k}": v for k, v in train_stats.items()}, **{f"test_{k}": v for k, v in test_stats.items()}, "epoch": epoch, } if args.output_dir: with open(os.path.join(output_dir, "log.txt"), "a") as f: f.write(json.dumps(log_stats) + "\n") total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("Training time{}".format(total_time_str))
from torch.optim.lr_scheduler import ReduceLROnPlateau import torch import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from torch.autograd import Variable as V from torchnet import meter from config.config import cfg from util.visualize import Visualizer from util.show_masked_image import show_masked_image from mmcv.runner import save_checkpoint,load_checkpoint #cfg.merge_from_file("config/un_att_pascal_0001.yaml") cfg.freeze() # 冻结参数 vis = Visualizer(cfg.MODEL.NAME, port=8097) AP = meter.APMeter() mAP = meter.mAPMeter() Loss_meter = meter.AverageValueMeter() os.environ["CUDA_VISIBLE_DEVICES"] ="0" def visualize_func(result): pass def train_epoch(model, training_data, optimizer): ''' Epoch operation in training phase''' AP.reset() mAP.reset() Loss_meter.reset() model.train()