def package_pipeline(ci, repository, branch, upload=None, lockfile=None): job, job_folder = ci.new_job() job_folder += "_pkg" cache_folder = os.path.join(job_folder, "cache") os.makedirs(cache_folder, exist_ok=True) with setenv("CONAN_USER_HOME", cache_folder): ci.run("conan config set general.revisions_enabled=True") ci.run( "conan config set general.default_package_id_mode=recipe_revision_mode" ) ci.run("conan remote remove conan-center") ci.run( "conan remote add master http://localhost:8081/artifactory/api/conan/ci-master -f" ) ci.run("conan user admin -p=password -r=master") with chdir(job_folder): ci.run("git clone %s" % repository) repo_folder = os.path.basename(repository) with chdir(repo_folder): ci.run("git checkout %s" % branch) os.makedirs("build") with chdir("build"): # This build is external to Conan if lockfile: save("conan.lock", lockfile) else: ci.run("conan graph lock ..") ci.run("conan install .. --lockfile") ci.run('cmake ../src -G "Visual Studio 15 Win64"') ci.run('cmake --build . --config Release') ci.run( "conan export-pkg .. user/testing --ignore-dirty --lockfile" ) if upload: ci.run("conan upload * -r=%s --all --confirm" % upload)
def git_init(self, readme=False): if readme: save("readme.txt", "README") self._run("git init .") self._run("git config core.autocrlf false") self._run("git add .") self._run("git commit -m initial")
def save_only_best(epoch, model, optimizer, models_folder, logger,\ train_loss, best_train_loss, val_loss, best_val_loss,\ train_acc, best_train_acc, val_acc, best_val_acc): if (train_loss < best_train_loss): logger.info('[Epoch %d] saving best train loss model: %.3f' % (epoch, train_loss)) save(epoch, model, optimizer, models_folder, "best_train_loss") best_train_loss = train_loss if (val_loss < best_val_loss): logger.info('[Epoch %d] saving best validation loss model: %.3f' % (epoch, val_loss)) save(epoch, model, optimizer, models_folder, "best_val_loss") best_val_loss = val_loss #Best accuracy if (train_acc > best_train_acc): logger.info('[Epoch %d] saving best train accuracy model: %.3f' % (epoch, train_acc)) save(epoch, model, optimizer, models_folder, "best_train_acc") best_train_acc = train_acc if (val_acc > best_val_acc): logger.info('[Epoch %d] saving best validation accuracy model: %.3f' % (epoch, val_acc)) save(epoch, model, optimizer, models_folder, "best_val_acc") best_val_acc = val_acc return best_train_loss, best_val_loss, best_train_acc, best_val_acc
def post(self, positionId): # 获取参数 data_dict = {k: str(v) for k, v in request.form.items()} # 保存数据 try: data = save(data_dict, COLLECTION=self.COLLECTION, update=True) except Exception as e: current_app.logger.error(e) return jsonify(errno=RET.DBERR, errmsg='数据保存错误') # 返回结果 if data: return jsonify(errno=RET.OK, errmsg='数据保存成功', data=data) else: return jsonify(errno=RET.DATAEXIST, errmsg='数据已存在')
def main(): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) x, y, x_val, y_val, x_test, y_test, date, close_price_true = \ load_data(args.dataset_file, args.sheet_name, args.src_seq_len, args.tgt_seq_len) train_set = MyDataset(x, y) valid_set = MyDataset(x_val, y_val) test_set = MyDataset(x_test, y_test) train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=16) valid_loader = DataLoader(valid_set, batch_size=args.eval_batch_size, shuffle=False, pin_memory=True, num_workers=16) test_loader = DataLoader(test_set, batch_size=1, shuffle=False, pin_memory=True, num_workers=16) # model setup model = MyTransformer(x.shape[2], args.encoder_nlayers, args.encoder_nhead, args.d_model, args.nhid, args.decoder_nlayers, args.decoder_nhead, args.dropout).to(args.device) if torch.cuda.device_count() > 1: print("Use %d %s", torch.cuda.device_count(), "GPUs !") model = nn.DataParallel(model) criterion = nn.MSELoss(reduction='sum').to(args.device) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=0.9, weight_decay=args.l2_reg) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs, eta_min=args.lr_min, last_epoch=-1) start_epoch = 0 # initialization # if args.run_in_google_colab: # pass if os.path.exists(args.exp_dir): checkpoint_path = os.path.join(args.exp_dir, 'model') checkpoint_file = os.path.join(checkpoint_path, 'checkpoint.pt') print(f'=> resuming from {checkpoint_file}') assert os.path.exists(checkpoint_file), 'Error. No checkpoint file.' checkpoint = torch.load(checkpoint_file) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] # best_fid = checkpoint['best_fid'] log = utils.create_logger(os.path.join(args.exp_dir, 'log')) log.info( f'=> checkpoint {checkpoint_file} loaded, (epoch {start_epoch})') else: print(f'start new experiment') log_path, checkpoint_path = utils.create_exp_path('./exp') log = utils.create_logger(log_path) log.info('root experimental dir created: {}'.format('./exp')) args.log = log # log.info(model) log.info('param size: {:5.4f} MB'.format( sum(np.prod(v.size()) for v in model.parameters()) / 1e6)) log.info('use {0} to train'.format(args.device)) log.info(args) best_val_loss = float("inf") best_model = model train_loss_list = [] valid_loss_list = [] for epoch in range(start_epoch + 1, args.epochs + 1): epoch_start_time = time.time() train_loss = train(model, optimizer, criterion, train_loader) valid_loss, _ = evaluate(model, criterion, valid_loader) train_loss_list.append(train_loss / len(train_set)) valid_loss_list.append(valid_loss / len(valid_set)) log.info('-' * 80) log.info( '| end of epoch {:3d} | time: {:5.2f}s | lr {:1.5f} | train mean loss {:5.7f} | valid mean loss {:5.7f} | ' .format(epoch, (time.time() - epoch_start_time), scheduler.get_last_lr()[0], train_loss / len(train_set), valid_loss / len(valid_set))) log.info('-' * 80) if valid_loss < best_val_loss: best_val_loss = valid_loss best_model = model utils.save(checkpoint_path, args, model, optimizer, epoch, is_best=True) else: utils.save(checkpoint_path, args, model, optimizer, epoch, is_best=False) scheduler.step() test_loss, y_pred = evaluate(best_model, criterion, test_loader) mape_score, r_score, theil_score = utils.measure_all( y_pred, date, close_price_true) log.info('-' * 80) log.info( '| test loss {:5.7f} | mape_score: {:2.5f} | r_score {:2.5f} | theil_score {:2.5f} | ' .format(test_loss / len(test_set), mape_score, r_score, theil_score)) log.info('-' * 80) return train_loss_list, valid_loss_list
def training(model, train_dataloader, valid_dataloader, test_dataloader, model_cfg, fold_idx=1): print("-------- ", str(fold_idx), " --------") global model_config model_config = model_cfg device = get_device() model.to(device) if fold_idx == 1: print('CONFIG: ') if fold_idx == 1: print([(v, getattr(model_config, v)) for v in dir(model_config) if v[:2] != "__"]) if fold_idx == 1: print('MODEL: ', model) epochs = model_config.epochs if model_config.optimizer == 'AdamW': optimizer = torch.optim.AdamW(model.parameters(), lr=float(model_config.lr), eps=float(model_config.eps), weight_decay=float( model_config.weight_decay)) elif model_config.optimizer == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=float(model_config.lr)) if model_config.scheduler == 'linear': scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=int(model_config.warmup_steps), num_training_steps=len(train_dataloader) * epochs) else: scheduler = None criterion = nn.BCEWithLogitsLoss() #nn.CrossEntropyLoss() swa_model = AveragedModel(model) if model_config.swa_scheduler == 'linear': swa_scheduler = SWALR(optimizer, swa_lr=float(model_config.lr)) else: swa_scheduler = CosineAnnealingLR(optimizer, T_max=100) print('TRAINING...') training_stats = [] best_dev_auc = float('-inf') with tqdm(total=epochs, leave=False) as pbar: for epoch_i in range(0, epochs): if epoch_i >= int(model_config.swa_start): update_bn(train_dataloader, swa_model) train_auc, train_acc, avg_train_loss = train( model, train_dataloader, device, criterion, optimizer) swa_model.update_parameters(model) swa_scheduler.step() update_bn(valid_dataloader, swa_model) valid_auc, valid_acc, avg_dev_loss, dev_d = valid( swa_model, valid_dataloader, device, criterion) else: train_auc, train_acc, avg_train_loss = train( model, train_dataloader, device, criterion, optimizer, scheduler=scheduler) valid_auc, valid_acc, avg_dev_loss, dev_d = valid( model, valid_dataloader, device, criterion) if cfg.final_train: valid_auc = 0 valid_acc = 0 avg_dev_loss = 0 add_stats(training_stats, avg_train_loss, avg_dev_loss, train_acc, train_auc, valid_acc, valid_auc) if (cfg.final_train & (epoch_i == epochs - 1)) | (not cfg.final_train & (valid_auc > best_dev_auc)): best_dev_auc = valid_auc if epoch_i >= int(model_config.swa_start): update_bn(test_dataloader, swa_model) test_d = gen_test(swa_model, test_dataloader, device) save(fold_idx, swa_model, optimizer, dev_d, test_d, valid_auc) else: test_d = gen_test(model, test_dataloader, device) save(fold_idx, model, optimizer, dev_d, test_d, valid_auc) pbar.update(1) print('TRAINING COMPLETED') # Show training results col_names = [ 'train_loss', 'train_acc', 'train_auc', 'dev_loss', 'dev_acc', 'dev_auc' ] training_stats = pd.DataFrame(training_stats, columns=col_names) print(training_stats.head(epochs)) plot_training_results(training_stats, fold_idx) # If config, get best model and make submission if cfg.run['submission'] == True: make_submission(model, test_dataloader)
# print(data_collector_list) def keep_unique(seq): seen = [] seen_add = seen.append return [x for x in seq if not (x in seen or seen_add(x))] # keep_unique(data_collector_list) sub_dir = 'scrape/' save(fname=sub_dir + 'data_collector_list_num_{}_{}'.format(start_itr, stop_itr), obj=data_collector_list) save(fname=sub_dir + 'data_collector_list_unique_num_{}_{}'.format(start_itr, stop_itr), obj=keep_unique(data_collector_list)) # In[ ]: print('Done saving!') # In[ ]: driver.close() # In[ ]:
def train(args, generator: Generator, discriminator: Discriminator, feature_extractor: FeatureExtractor, photo_dataloader, edge_smooth_dataloader, animation_dataloader, checkpoint_dir=None): tb_writter = SummaryWriter() gen_criterion = nn.BCELoss().to(args.device) disc_criterion = nn.BCELoss().to(args.device) content_criterion = nn.L1Loss().to(args.device) gen_optimizer = torch.optim.Adam(generator.parameters(), lr=args.lr, betas=(args.adam_beta, 0.999)) disc_optimizer = torch.optim.Adam(discriminator.parameters(), lr=args.lr, betas=(args.adam_beta, 0.999)) global_step = 0 global_init_step = 0 # The number of steps to skip when loading a checkpoint skipped_step = 0 skipped_init_step = 0 cur_epoch = 0 cur_init_epoch = 0 data_len = min(len(photo_dataloader), len(edge_smooth_dataloader), len(animation_dataloader)) if checkpoint_dir: try: checkpoint_dict = load(checkpoint_dir) generator.load_state_dict(checkpoint_dict['generator']) discriminator.load_state_dict(checkpoint_dict['discriminator']) gen_optimizer.load_state_dict(checkpoint_dict['gen_optimizer']) disc_optimizer.load_state_dict(checkpoint_dict['disc_optimizer']) global_step = checkpoint_dict['global_step'] global_init_step = checkpoint_dict['global_init_step'] cur_epoch = global_step // data_len cur_init_epoch = global_init_step // len(photo_dataloader) skipped_step = global_step % data_len skipped_init_step = global_init_step % len(photo_dataloader) logger.info("Start training with,") logger.info("In initialization step, epoch: %d, step: %d", cur_init_epoch, skipped_init_step) logger.info("In main train step, epoch: %d, step: %d", cur_epoch, skipped_step) except: logger.info("Wrong checkpoint path") t_total = data_len * args.n_epochs t_init_total = len(photo_dataloader) * args.n_init_epoch # Train! logger.info("***** Running training *****") logger.info(" Num photo examples = %d", len(photo_dataloader)) logger.info(" Num edge_smooth examples = %d", len(edge_smooth_dataloader)) logger.info(" Num animation examples = %d", len(animation_dataloader)) logger.info(" Num Epochs = %d", args.n_epochs) logger.info(" Total train batch size = %d", args.batch_size) logger.info(" Total optimization steps = %d", t_total) logger.info(" Num Init Epochs = %d", args.n_init_epoch) logger.info(" Total Init optimization steps = %d", t_init_total) logger.info(" Logging steps = %d", args.logging_steps) logger.info(" Save steps = %d", args.save_steps) init_phase = True try: generator.train() discriminator.train() gloabl_init_loss = 0 # --- Initialization Content loss mb = master_bar(range(cur_init_epoch, args.n_init_epoch)) for init_epoch in mb: epoch_iter = progress_bar(photo_dataloader, parent=mb) for step, (photo, _) in enumerate(epoch_iter): if skipped_init_step > 0: skipped_init_step = -1 continue photo = photo.to(args.device) gen_optimizer.zero_grad() x_features = feature_extractor((photo + 1) / 2).detach() Gx = generator(photo) Gx_features = feature_extractor((Gx + 1) / 2) content_loss = args.content_loss_weight * content_criterion( Gx_features, x_features) content_loss.backward() gen_optimizer.step() gloabl_init_loss += content_loss.item() global_init_step += 1 if args.save_steps > 0 and global_init_step % args.save_steps == 0: logger.info( "Save Initialization Phase, init_epoch: %d, init_step: %d", init_epoch, global_init_step) save(checkpoint_dir, global_step, global_init_step, generator, discriminator, gen_optimizer, disc_optimizer) if args.logging_steps > 0 and global_init_step % args.logging_steps == 0: tb_writter.add_scalar('Initialization Phase/Content Loss', content_loss.item(), global_init_step) tb_writter.add_scalar( 'Initialization Phase/Global Generator Loss', gloabl_init_loss / global_init_step, global_init_step) logger.info( "Initialization Phase, Epoch: %d, Global Step: %d, Content Loss: %.4f", init_epoch, global_init_step, gloabl_init_loss / (global_init_step)) # ----------------------------------------------------- logger.info("Finish Initialization Phase, save model...") save(checkpoint_dir, global_step, global_init_step, generator, discriminator, gen_optimizer, disc_optimizer) init_phase = False global_loss_D = 0 global_loss_G = 0 global_loss_content = 0 mb = master_bar(range(cur_epoch, args.n_epochs)) for epoch in mb: epoch_iter = progress_bar(list( zip(animation_dataloader, edge_smooth_dataloader, photo_dataloader)), parent=mb) for step, ((animation, _), (edge_smoothed, _), (photo, _)) in enumerate(epoch_iter): if skipped_step > 0: skipped_step = -1 continue animation = animation.to(args.device) edge_smoothed = edge_smoothed.to(args.device) photo = photo.to(args.device) disc_optimizer.zero_grad() # --- Train discriminator # ------ Train Discriminator with animation image animation_disc = discriminator(animation) animation_target = torch.ones_like(animation_disc) loss_animation_disc = disc_criterion(animation_disc, animation_target) # ------ Train Discriminator with edge image edge_smoothed_disc = discriminator(edge_smoothed) edge_smoothed_target = torch.zeros_like(edge_smoothed_disc) loss_edge_disc = disc_criterion(edge_smoothed_disc, edge_smoothed_target) # ------ Train Discriminator with generated image generated_image = generator(photo).detach() generated_image_disc = discriminator(generated_image) generated_image_target = torch.zeros_like(generated_image_disc) loss_generated_disc = disc_criterion(generated_image_disc, generated_image_target) loss_disc = loss_animation_disc + loss_edge_disc + loss_generated_disc loss_disc.backward() disc_optimizer.step() global_loss_D += loss_disc.item() # --- Train Generator gen_optimizer.zero_grad() generated_image = generator(photo) generated_image_disc = discriminator(generated_image) generated_image_target = torch.ones_like(generated_image_disc) loss_adv = gen_criterion(generated_image_disc, generated_image_target) # ------ Train Generator with content loss x_features = feature_extractor((photo + 1) / 2).detach() Gx_features = feature_extractor((generated_image + 1) / 2) loss_content = args.content_loss_weight * content_criterion( Gx_features, x_features) loss_gen = loss_adv + loss_content loss_gen.backward() gen_optimizer.step() global_loss_G += loss_adv.item() global_loss_content += loss_content.item() global_step += 1 if args.save_steps > 0 and global_step % args.save_steps == 0: logger.info("Save Training Phase, epoch: %d, step: %d", epoch, global_step) save(checkpoint_dir, global_step, global_init_step, generator, discriminator, gen_optimizer, disc_optimizer) if args.logging_steps > 0 and global_init_step % args.logging_steps == 0: tb_writter.add_scalar('Train Phase/Generator Loss', loss_adv.item(), global_step) tb_writter.add_scalar('Train Phase/Discriminator Loss', loss_disc.item(), global_step) tb_writter.add_scalar('Train Phase/Content Loss', loss_content.item(), global_step) tb_writter.add_scalar('Train Phase/Global Generator Loss', global_loss_G / global_step, global_step) tb_writter.add_scalar( 'Train Phase/Global Discriminator Loss', global_loss_D / global_step, global_step) tb_writter.add_scalar('Train Phase/Global Content Loss', global_loss_content / global_step, global_step) logger.info( "Training Phase, Epoch: %d, Global Step: %d, Disc Loss %.4f, Gen Loss %.4f, Content Loss: %.4f", epoch, global_step, global_loss_D / global_step, global_loss_G / global_step, global_loss_content / global_step) except KeyboardInterrupt: if init_phase: logger.info("KeyboardInterrupt in Initialization Phase!") logger.info("Save models, init_epoch: %d, init_step: %d", init_epoch, global_init_step) else: logger.info("KeyboardInterrupt in Training Phase!") logger.info("Save models, epoch: %d, step: %d", epoch, global_step) save(checkpoint_dir, global_step, global_init_step, generator, discriminator, gen_optimizer, disc_optimizer)
import torch import torch.nn as nn from torchvision import transforms from dataset import Dataset from architectures.baseline.unet import UNet_G from architectures.virtual.virtual_unet import Virtual_UNet_G from architectures.baseline.discriminator import PatchGan_D_70x70 from trainers.trainer import Trainer from utils.utils import save, load train_dir_name = ['data/file/train/input', 'data/file/train/target'] val_dir_name = ['data/file/val/input', 'data/file/val/target'] lr_D, lr_G, bs = 0.0002, 0.0002, 8 ic, oc, use_sigmoid = 1, 1, False norm_type = 'instancenorm' train_data = Dataset(train_dir_name, basic_types = 'CycleGan', shuffle = True, single_channel = False) val_data = Dataset(val_dir_name, basic_types = 'Pix2Pix', shuffle = False, single_channel = False) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') netD = PatchGan_D_70x70(ic, oc, use_sigmoid, norm_type, use_sn = False).to(device) netG = UNet_G(ic, oc, use_bn = True, use_sn = False, norm_type = norm_type).to(device) trn_dl = train_data.get_loader(None, bs) val_dl = list(val_data.get_loader(None, 3))[0] trainer = Trainer('SGAN', netD, netG, device, trn_dl, val_dl, lr_D = lr_D, lr_G = lr_G, rec_weight = 10, resample = True, weight_clip = None, use_gradient_penalty = False, loss_interval = 150, image_interval = 300, save_img_dir = 'saved_images/'): trainer.train(5) save('saved/cur_state.state', netD, netG, trainer.optimizerD, trainer.optimizerG)
# In[ ]: # print(data_collector_list) def keep_unique(seq): seen = [] seen_add = seen.append return [x for x in seq if not (x in seen or seen_add(x))] data_collector_list_unique = keep_unique(data_collector_list) sub_dir = 'scrape/' save(fname=sub_dir+'wikipedia_raw_data_collector_num_{}_{}'. format(start_itr, stop_itr), obj=data_collector_list_unique) # In[ ]: print('Done saving!') # In[ ]: driver.close() # In[ ]:
import torch import torch.nn as nn from torchvision import transforms from dataset import Dataset from architectures.progressive.spade_fade import SPADE_G_Fade from architectures.progressive.spade_freeze import SPADE_G_Freeze from architectures.baseline.discriminator import PatchGan_D_70x70_One_Input from trainers.trainer_pro import Trainer from utils.utils import save, load train_dir_name = ['data/file/train/input', 'data/file/train/target'] val_dir_name = ['data/file/val/input', 'data/file/val/target'] lr_D, lr_G, bs = 0.0002, 0.0002, 8 sz, ic, oc, use_sigmoid = 256, 3, 3, False norm_type = 'instancenorm' train_data = Dataset(train_dir_name, basic_types = 'CycleGan', shuffle = True, single_channel = False) val_data = Dataset(val_dir_name, basic_types = 'Pix2Pix', shuffle = False, single_channel = False) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') netD_A = PatchGan_D_70x70_One_Input(ic, use_sigmoid, norm_type, use_sn = True).to(device) netD_B = PatchGan_D_70x70_One_Input(oc, use_sigmoid, norm_type, use_sn = True).to(device) netG_A2B = SPADE_G_Fade(ic, oc, sz, nz = 8).to(device) netG_B2A = SPADE_G_Fade(oc, ic, sz, nz = 8).to(device) trainer = Trainer('SGAN', netD_A, netD_B, netG_A2B, netG_B2A, device, train_data, val_data, lr_D = lr_D, lr_G = lr_G, cycle_weight = 10, identity_weight = 5.0, ds_weight = 8, resample = True, weight_clip = None, use_gradient_penalty = False, loss_interval = 150, image_interval = 300, save_img_dir = 'saved_imges') trainer.train([50, 50, 50, 50], [0.5, 0.5, 0.5], [16, 8, 4, 2]) save('saved/cur_state.state', netD_A, netD_B, netG_A2B, netG_B2A, trainer.optimizerD_A, trainer.optimizerD_B, trainer.optimizerG)
def adversarial_domain(source_cnn, target_cnn, discriminator, source_loader, target_loader, target_test_loader, criterion, d_criterion, optimizer, d_optimizer, best_score, best_class_score, epoch_i, logger, args=None): source_cnn.eval() target_cnn.encoder.train() discriminator.train() best_score = best_score best_class_score = best_class_score losses, d_losses = AverageMeter(), AverageMeter() n_iters = min(len(source_loader), len(target_loader)) valSteps = n_iters // args.num_val valStepsList = [valSteps + (x * valSteps) for x in range(args.num_val)] vals = valStepsList[:-1] source_iter, target_iter = iter(source_loader), iter(target_loader) for iter_i in range(n_iters): source_data, source_target = source_iter.next() target_data, target_target, target_conf, target_domain, target_domain_conf = target_iter.next( ) source_data = source_data.to(args.device) target_data = target_data.to(args.device) target_target = target_target.to(args.device) target_conf = target_conf.to(args.device) target_domain = target_domain.to(args.device) target_domain_conf = target_domain_conf.to(args.device) bs = source_data.size(0) D_input_source = source_cnn.encoder(source_data) D_input_target = target_cnn.encoder(target_data) D_target_source = torch.tensor([0] * bs, dtype=torch.long).to(args.device) D_target_target = torch.tensor([1] * bs, dtype=torch.long).to(args.device) # train Discriminator D_output_source = discriminator(D_input_source) D_output_target = discriminator(D_input_target) D_output = torch.cat([D_output_source, D_output_target], dim=0) D_target = torch.cat([D_target_source, D_target_target], dim=0) d_loss = criterion(D_output, D_target) d_optimizer.zero_grad() d_loss.backward() d_optimizer.step() d_losses.update(d_loss.item(), bs) # train Target D_input_target = target_cnn.encoder(target_data) D_output_target = discriminator(D_input_target) D_output_target_P = target_cnn.classifier(D_input_target) lossT = criterion(D_output_target, D_target_source) validSource = (target_domain == 0) & (target_conf >= args.thr) validMaskSource = validSource.nonzero(as_tuple=False)[:, 0] validTarget = (target_domain == 1) & ( target_domain_conf <= args.thr_domain) & (target_conf >= args.thr) validMaskTarget = validTarget.nonzero(as_tuple=False)[:, 0] validIndexes = torch.cat((validMaskSource, validMaskTarget), 0) lossP = criterion(D_output_target_P[validIndexes], target_target[validIndexes]) loss = lossT + args.lam * lossP optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item(), bs) if iter_i in vals: validation = validate(target_cnn, target_test_loader, criterion, args=args) clsNames = validation['classNames'] is_best = (best_score is None or validation['avgAcc'] > best_score) best_score = validation['avgAcc'] if is_best else best_score best_class_score = validation[ 'classAcc'] if is_best else best_class_score state_dict = { 'model': target_cnn.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch_i, 'val/acc': best_score, } save(args.logdir, state_dict, is_best) logger.info('Epoch_{} Iter_{}'.format(epoch_i, iter_i)) for cls_idx, clss in enumerate(clsNames): logger.info('{}: {}'.format(clss, validation['classAcc'][cls_idx])) logger.info('Current val. acc.: {}'.format(validation['avgAcc'])) logger.info('Best val. acc.: {}'.format(best_score)) classWiseDict = {} for cls_idx, clss in enumerate(clsNames): classWiseDict[clss] = validation['classAcc'][cls_idx].item() source_cnn.eval() target_cnn.encoder.train() discriminator.train() return { 'd/loss': d_losses.avg, 'target/loss': losses.avg, 'best_score': best_score, 'best_class_score': best_class_score, 'n_iters': n_iters }
def train_target_cnnP_domain(source_cnn, target_cnn, discriminator, criterion, optimizer, d_optimizer, source_train_loader, target_train_loader, target_test_loader, logger, args=None): validation = validate(source_cnn, target_test_loader, criterion, args=args) log_source = 'Source/Acc {:.3f} '.format(validation['avgAcc']) try: best_score = None best_class_score = None for epoch_i in range(1, 1 + args.epochs): start_time = time() training = adversarial_domain(source_cnn, target_cnn, discriminator, source_train_loader, target_train_loader, target_test_loader, criterion, criterion, optimizer, d_optimizer, best_score, best_class_score, epoch_i, logger, args=args) best_score = training['best_score'] best_class_score = training['best_class_score'] n_iters = training['n_iters'] validation = validate(target_cnn, target_test_loader, criterion, args=args) clsNames = validation['classNames'] log = 'Epoch {}/{} '.format(epoch_i, args.epochs) log += 'D/Loss {:.3f} Target/Loss {:.3f} '.format( training['d/loss'], training['target/loss']) log += '[Val] Target/Loss {:.3f} Target/Acc {:.3f} '.format( validation['loss'], validation['acc']) log += log_source log += 'Time {:.2f}s'.format(time() - start_time) logger.info(log) # save is_best = (best_score is None or validation['avgAcc'] > best_score) best_score = validation['avgAcc'] if is_best else best_score best_class_score = validation[ 'classAcc'] if is_best else best_class_score state_dict = { 'model': target_cnn.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch_i, 'val/acc': best_score, } save(args.logdir, state_dict, is_best) for cls_idx, clss in enumerate(clsNames): logger.info('{}: {}'.format(clss, validation['classAcc'][cls_idx])) logger.info('Current val. acc.: {}'.format(validation['avgAcc'])) logger.info('Best val. acc.: {}'.format(best_score)) classWiseDict = {} for cls_idx, clss in enumerate(clsNames): classWiseDict[clss] = validation['classAcc'][cls_idx].item() except KeyboardInterrupt as ke: logger.info('\n============ Summary ============= \n') logger.info('Classwise accuracies: {}'.format(best_class_score)) logger.info('Best val. acc.: {}'.format(best_score)) return best_score, best_class_score, clsNames
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("core.genotypes.%s" % args.arch) #if args.set == "KMNIST": # model = NetworkKMNIST(args.init_channels, args.input_channels, num_classes, args.layers, args.auxiliary, genotype) #elif args.set == "K49": model = Network(args.init_channels, args.input_channels, num_classes, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Data augmentations train_transform, valid_transform = utils.data_transforms_Kuzushiji(args) # Dataset if args.set == "KMNIST": train_data = KMNIST(args.data_dir, True, train_transform) test_data = KMNIST(args.data_dir, False, valid_transform) elif args.set == "K49": train_data = K49(args.data_dir, True, train_transform) test_data = K49(args.data_dir, False, valid_transform) else: raise ValueError("Unknown Dataset %s" % args.dataset) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) best_acc = 0.0 for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d/%d lr %e', epoch, args.epochs, scheduler.get_lr()[0]) genotype = eval("core.genotypes.%s" % args.arch) print('---------Genotype---------') logging.info(genotype) print('--------------------------') model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc logging.info('valid_acc %f, best_acc %f', valid_acc, best_acc) utils.save(model, os.path.join(log_path, 'weights.pt'))
def save(self): # do not modify this section save(self.model) summarize(self.summary.history) self.logs.save()
def main(args): # loading configurations with open(args.config) as f: config = yaml.safe_load(f)["configuration"] name = config["Name"] # Construct or load embeddings print("Initializing embeddings ...") vocab_size = config["embeddings"]["vocab_size"] embed_size = config["embeddings"]["embed_size"] per_num = config["embeddings"]["person_num"] per_embed_size = config["embeddings"]["person_embed_size"] ori_emb, ori_p_emb = load_embedding("model/emb.tsv") embeddings = init_embeddings(vocab_size, embed_size, initial_values=ori_emb, name=name) print("\tDone.") # Build the model and compute losses source_ids = tf.placeholder(tf.int32, [None, 40], name="source") target_ids = tf.placeholder(tf.int32, [None, 40], name="target") person_ids = tf.placeholder(tf.int32, [None], name="person_ids") lexicons_ids = tf.placeholder(tf.int32, [per_num, 1000], name="lexicons_ids") spectrogram = tf.placeholder(tf.float32, [None, 400, 200], name="audio") sequence_mask = tf.placeholder(tf.bool, [None, 40], name="mask") choice_qs = tf.placeholder(tf.float32, [None, 40], name="choice") emo_cat = tf.placeholder(tf.int32, [None], name="emotion_category") is_train = tf.placeholder(tf.bool) (enc_num_layers, enc_num_units, enc_cell_type, enc_bidir, dec_num_layers, dec_num_units, dec_cell_type, state_pass, num_emo, emo_cat_units, emo_int_units, infer_batch_size, beam_size, max_iter, attn_num_units, l2_regularize, word_config, spectrogram_config, lstm_int_num, batch_size, loss_weight) = get_PEC_config(config) print("Building model architecture ...") CE, loss, cla_loss, train_outs, infer_outputs, score = compute_loss( source_ids, target_ids, sequence_mask, choice_qs, embeddings, enc_num_layers, enc_num_units, enc_cell_type, enc_bidir, dec_num_layers, dec_num_units, dec_cell_type, state_pass, num_emo, emo_cat, emo_cat_units, emo_int_units, infer_batch_size, spectrogram, word_config, per_num, person_ids, per_embed_size, spectrogram_config, loss_weight, lstm_int_num, is_train, False, lexicons_ids, beam_size, max_iter, attn_num_units, l2_regularize, name) print("\tDone.") # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. (logdir, restore_from, learning_rate, gpu_fraction, max_checkpoints, train_steps, batch_size, print_every, checkpoint_every, s_filename, t_filename, q_filename, s_max_leng, t_max_leng, dev_s_filename, dev_t_filename, dev_q_filename, loss_fig, perp_fig, sp_filename, sp_max_leng, test_s_filename, test_t_filename, test_q_filename, test_output) = get_training_config(config, "training") is_overwritten_training = logdir != restore_from optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-4) trainable = tf.trainable_variables() gradients = tf.gradients(loss, trainable) clipped_gradients, gradient_norm = tf.clip_by_global_norm(gradients, 5.0) optim = optimizer.apply_gradients(zip(clipped_gradients, trainable)) # optim = optimizer.minimize(loss, var_list=trainable) # Set up session gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. var_list = tf.trainable_variables() g_list = tf.global_variables() bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name] var_list += bn_moving_vars saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=max_checkpoints) # BN extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except Exception: print("Something went wrong while restoring checkpoint. " "Training is terminated to avoid the overwriting.") raise # ##### Training ##### # Load data print("Loading data ...") # id_0, id_1, id_2 preserved for SOS, EOS, constant zero padding embed_shift = 3 lexicons = load_lexicons() + embed_shift source_sentences_ids, source_person, source_data = loadfile( s_filename, is_dialog=True, is_source=True, max_length=s_max_leng) source_data += embed_shift target_sentences_ids, target_person, target_data, category_data = loadfile( t_filename, is_dialog=True, is_source=False, max_length=t_max_leng) target_data += embed_shift spectrogram_data = load_spectrogram(sp_filename, source_sentences_ids) choice_data = loadfile(q_filename, is_dialog=False, is_source=False, max_length=t_max_leng) choice_data = choice_data.astype(np.float32) masks = (target_data >= embed_shift) masks = np.append(np.ones([len(masks), 1], dtype=bool), masks, axis=1) masks = masks[:, :-1] n_data = len(source_data) dev_source_data = None if dev_s_filename is not None: dev_source_sentences_ids, dev_source_person, dev_source_data = loadfile( dev_s_filename, is_dialog=True, is_source=True, max_length=s_max_leng) dev_source_data += embed_shift dev_target_sentences_ids, dev_target_person, dev_target_data, dev_category_data = loadfile( dev_t_filename, is_dialog=True, is_source=False, max_length=t_max_leng) dev_target_data += embed_shift dev_spectrogram_data = load_spectrogram(sp_filename, dev_source_sentences_ids) dev_choice_data = loadfile(dev_q_filename, is_dialog=False, is_source=False, max_length=t_max_leng) dev_choice_data[dev_choice_data < 0] = 0 dev_choice_data = dev_choice_data.astype(np.float32) dev_masks = (dev_target_data >= embed_shift) dev_masks = np.append(np.ones([len(dev_masks), 1], dtype=bool), dev_masks, axis=1) dev_masks = dev_masks[:, :-1] print("\tDone.") # Training last_saved_step = saved_global_step num_steps = saved_global_step + train_steps losses = [] cla_losses = [] steps = [] perps = [] dev_perps = [] print("Start training ...") try: step = last_saved_step for step in range(saved_global_step + 1, num_steps): start_time = time.time() rand_indexes = np.random.choice(n_data, batch_size) source_batch = source_data[rand_indexes] target_batch = target_data[rand_indexes] person_batch = target_person[rand_indexes] spectrogram_batch = spectrogram_data[rand_indexes] mask_batch = masks[rand_indexes] choice_batch = choice_data[rand_indexes] emotions = category_data[rand_indexes] feed_dict = { source_ids: source_batch, target_ids: target_batch, person_ids: person_batch, spectrogram: spectrogram_batch, sequence_mask: mask_batch, choice_qs: choice_batch, emo_cat: emotions, lexicons_ids: lexicons, is_train: True, } loss_value, cla_value, _, __ = sess.run( [loss, cla_loss, optim, extra_update_ops], feed_dict=feed_dict) losses.append(loss_value) cla_losses.append(cla_value) duration = time.time() - start_time if step % print_every == 0: # train perplexity t_perp = compute_perplexity(sess, CE, mask_batch, feed_dict) perps.append(t_perp) # dev perplexity dev_str = "" if dev_source_data is not None: CE_words = N_words = 0.0 for start in range(0, len(dev_source_data), batch_size): dev_feed_dict = { source_ids: dev_source_data[start:start + batch_size], target_ids: dev_target_data[start:start + batch_size], person_ids: dev_target_person[start:start + batch_size], spectrogram: dev_spectrogram_data[start:start + batch_size], choice_qs: dev_choice_data[start:start + batch_size], emo_cat: dev_category_data[start:start + batch_size], sequence_mask: dev_masks[start:start + batch_size], lexicons_ids: lexicons, is_train: False, } CE_word, N_word = compute_test_perplexity( sess, CE, dev_masks[start:start + batch_size], dev_feed_dict) CE_words += CE_word N_words += N_word dev_str = "dev_prep: {:.3f}, ".format( np.exp(CE_words / N_words)) dev_perps.append(np.exp(CE_words / N_words)) steps.append(step) info = 'step {:d}, loss = {:.6f}, cla_loss = {:.6f} ' info += 'perp: {:.3f}, {}({:.3f} sec/step)' print( info.format(step, loss_value, cla_value, t_perp, dev_str, duration)) if step % checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C so save message is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step)