def training_loop(dataset, batch_sizes, learning_rates, local_folder, epochs, solver_params, fit_params, stem='', root='../models', phase_path='', annotator_path=''): # Training Loop for batch_size, lr in product(batch_sizes, learning_rates): # sub path sub_path = f'{local_folder}/' if phase_path is not '': sub_path += f'{phase_path}/' if annotator_path is not '': sub_path += f'{annotator_path}/' # For Documentation current_time = datetime.datetime.now(pytz.timezone('Europe/Berlin')).strftime("%Y%m%d-%H%M%S") hyperparams = {'batch': batch_size, 'lr': lr} writer = get_writer(path=f'../logs/{sub_path}', stem=stem, current_time=current_time, params=hyperparams) # Save model path if local_folder != '' and not os.path.exists('../models/' + sub_path): os.makedirs('../models/' + sub_path) path = '../models/' if local_folder != '': path += sub_path save_params = {'stem': stem, 'current_time': current_time, 'hyperparams': hyperparams} # Training solver = Solver(dataset, lr, batch_size, writer=writer, save_path_head=path, save_params=save_params, **solver_params) model, f1 = solver.fit(**fit_params) # Save model model_path = get_model_path(path, stem, current_time, hyperparams, f1) torch.save(model.state_dict(), model_path + f'_epoch{epochs}.pt')
def main(): parser = argparse.ArgumentParser(description='Proxyless-NAS augment') parser.add_argument('-n', '--name', type=str, required=True, help="name of the model") parser.add_argument('-c', '--config', type=str, default='./config/default.yaml', help="yaml config file") parser.add_argument('-p', '--chkpt', type=str, default=None, help="path of checkpoint pt file") parser.add_argument('-d', '--device', type=str, default="all", help="override device ids") parser.add_argument('-g', '--genotype', type=str, default=None, help="override genotype file") args = parser.parse_args() hp = HParam(args.config) pt_path = os.path.join('.', hp.log.chkpt_dir) out_dir = os.path.join(pt_path, args.name) os.makedirs(out_dir, exist_ok=True) log_dir = os.path.join('.', hp.log.log_dir) log_dir = os.path.join(log_dir, args.name) os.makedirs(log_dir, exist_ok=True) logger = utils.get_logger(log_dir, args.name) if utils.check_config(hp, args.name): raise Exception("Config error.") writer = utils.get_writer(log_dir, hp.log.writer) dev, dev_list = utils.init_device(hp.device, args.device) trn_loader = load_data(hp.augment.data, validation=False) val_loader = load_data(hp.augment.data, validation=True) gt.set_primitives(hp.genotypes) # load genotype genotype = utils.get_genotype(hp.augment, args.genotype) model, arch = get_model(hp.model, dev, dev_list, genotype) augment(out_dir, args.chkpt, trn_loader, val_loader, model, writer, logger, dev, hp.augment)
def __init__(self, config): # Environment # =================================================================== self.config = config self.device = config["train"]["device"] if torch.cuda.is_available() else "cpu" # Dataset # =================================================================== train_dataset, query_dataset, base_dataset, alphabet_len, max_str_len = \ get_dataset(path_to_dataset=config["dataset"]["path_to_dataset"], training_set_num=config["dataset"]["training_set_num"], query_set_num=config["dataset"]["query_set_num"], neighbor_num=config["dataset"]["neighbor_num"]) self.train_loader = DataLoader(dataset=train_dataset, batch_size=config["dataloader"]["batch_size"], num_workers=config["dataloader"]["num_workers"], shuffle=True) self.query_loader = DataLoader(dataset=query_dataset, batch_size=config["dataloader"]["batch_size"], num_workers=config["dataloader"]["num_workers"], shuffle=False) self.base_loader = DataLoader(dataset=base_dataset, batch_size=config["dataloader"]["batch_size"], num_workers=config["dataloader"]["num_workers"], shuffle=False) # Model # =================================================================== model_config = get_model_config(n_features=config["model"]["n_features"]) model = Model(model_config, alphabet_len, max_str_len) self.model = model.to(self.device) # Optimizer # =================================================================== self.optimizer = optim.Adam(self.model.parameters(), lr=config["optimizer"]["lr"]) # Loss Function # =================================================================== criterion = Criterion(config["criterion"]["alpha"]) self.criterion = criterion.to(self.device) # Training State # =================================================================== self.current_epoch = -1 self.current_acc = 0 # Logger # =================================================================== self.writer = get_writer(config["train"]["logdir_tb"]) get_logger(config["train"]["logdir"]) self.losses = AverageMeter() self.triplet_losses = AverageMeter() self.appro_losses = AverageMeter()
def __init__(self, cfgs): save_dict = OrderedDict() save_dict["fold"] = cfgs["fold"] if cfgs["memo"] is not None: save_dict["memo"] = cfgs["memo"] # 1,2,3 specific_dir = [ "{}-{}".format(key, save_dict[key]) for key in save_dict.keys() ] cfgs["save_dir"] = os.path.join( cfgs["save_dir"], cfgs["model"]["meta"], cfgs["model"]["inputs"]["label"], "_".join(specific_dir), ) # cfgs["save_dir"] = os.path.join(cfgs["save_dir"], "_".join(specific_dir)) os.makedirs(cfgs["save_dir"], exist_ok=True) self.cfgs = cfgs self.cfgs_test = cfgs["model"]["test"] self.tb_writer = utils.get_writer(self.cfgs) self.txt_logger = utils.get_logger(self.cfgs) self.txt_logger.write("\n\n----test.py----") self.txt_logger.write("\n{}".format(datetime.datetime.now())) self.txt_logger.write("\n\nSave Directory: \n{}".format( self.cfgs["save_dir"])) self.txt_logger.write("\n\nConfigs: \n{}\n".format(self.cfgs)) ####### MODEL # NOTE: No Multiple GPU Support for Test model = models.get_model(self.cfgs) self.device = torch.device("cuda:{}".format(self.cfgs["local_rank"])) self.model = model.to(self.device)
def translate(url, my_writer, blog_type: BlogType): # net request print("do request with url : " + url) context = ssl._create_unverified_context() req = request.Request(url) req.add_header("User-Agent", USER_AGENT) response = request.urlopen(req, context=context).read().decode('utf-8') soup = BeautifulSoup(response, 'html.parser') # write blog header utils.write_blog_header(soup, my_writer, blog_type) root_tag = utils.get_root_tag(soup, blog_type) if root_tag is None: print("root tag is None !") exit() root_processor = processorChainBuilder.build_tag_processor(my_writer) root_processor.check(root_tag) print("done !") if __name__ == '__main__': url_param, output_dir, blog_type_param = utils.check_params() if not os.path.exists(output_dir) or not os.path.isdir(output_dir): os.mkdir(output_dir) writer = utils.get_writer(output_dir, blog_type_param) translate(url_param, writer, blog_type_param)
def main(args): logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, handlers=[ logging.FileHandler( os.path.join(args.output_dir, "run.log"), mode="w", encoding="utf-8", ) ], ) logger.info("********** Configuration Arguments **********") for arg, value in sorted(vars(args).items()): logger.info(f"{arg}: {value}") logger.info("**************************************************") set_seed(args) # metric and label label_name = GLUE_PROCESSED[args.task_name][1] if label_name: label2id = dict(zip(label_name, range(len(label_name)))) else: label2id = None metric_list = GLUE_METRICS[args.task_name] generate_max_length = label_length_map[args.task_name] writer = get_writer(args) # get model and tokenizer model = T5ForConditionalGeneration.from_pretrained(args.model_name_or_path) tokenizer = T5Tokenizer.from_pretrained(args.model_name_or_path) # get dataloader train_dataloader = get_train_dataloader(tokenizer, args) if args.task_name == "mnli": dev_dataloader_match = get_mnli_dev_dataloader(tokenizer, args, matched=True) dev_dataloader_mismatch = get_mnli_dev_dataloader(tokenizer, args, matched=False) else: dev_dataloader = get_dev_dataloader(tokenizer, args) num_update_steps_per_epoch = math.ceil( len(train_dataloader) / args.gradient_accumulation_steps) if args.max_train_steps > 0: args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) else: args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch # get lr_scheduler lr_scheduler = get_scheduler( learning_rate=args.learning_rate, scheduler_type=args.scheduler_type, num_warmup_steps=args.warmup_steps if args.warmup_steps > 0 else args.warmup_radio, num_training_steps=args.max_train_steps, ) total_batch_size = args.train_batch_size * args.gradient_accumulation_steps decay_params = [ p.name for n, p in model.named_parameters() if not any(nd in n for nd in ["bias", "norm"]) ] optimizer = AdamW( learning_rate=lr_scheduler, beta1=0.9, beta2=0.999, epsilon=args.adam_epsilon, parameters=model.parameters(), weight_decay=args.weight_decay, apply_decay_param_fun=lambda x: x in decay_params, ) if args.use_amp: scaler = GradScaler(init_loss_scaling=args.scale_loss) logger.info("********** Running training **********") logger.info(f" Num examples = {len(train_dataloader.dataset)}") logger.info(f" Num Epochs = {args.num_train_epochs}") logger.info(f" Instantaneous train batch size = {args.train_batch_size}") logger.info(f" Instantaneous eval batch size = {args.eval_batch_size}") logger.info( f" Total train batch size (w. accumulation) = {total_batch_size}") logger.info( f" Gradient Accumulation steps = {args.gradient_accumulation_steps}") logger.info(f" Total optimization steps = {args.max_train_steps}") progress_bar = tqdm(range(args.max_train_steps)) global_steps = 0 tr_loss, logging_loss = 0.0, 0.0 for _ in range(args.num_train_epochs): for step, batch in enumerate(train_dataloader): model.train() with auto_cast(args.use_amp, custom_white_list=["layer_norm", "softmax"]): source_ids, source_mask, labels, target_mask = batch outputs = model( input_ids=source_ids, attention_mask=source_mask, labels=labels, decoder_attention_mask=target_mask, ) loss = outputs[0] / args.gradient_accumulation_steps tr_loss += loss.item() if args.use_amp: scaler.scale(loss).backward() else: loss.backward() if (step % args.gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1): if args.use_amp: scaler.minimize(optimizer, loss) else: optimizer.step() lr_scheduler.step() optimizer.clear_grad() progress_bar.update(1) global_steps += 1 if args.logging_steps > 0 and global_steps % args.logging_steps == 0: writer.add_scalar("lr", lr_scheduler.get_lr(), global_steps) writer.add_scalar( "loss", (tr_loss - logging_loss) / args.logging_steps, global_steps, ) logger.info( "global_steps {} - lr: {:.10f} loss: {:.10f}".format( global_steps, lr_scheduler.get_lr(), (tr_loss - logging_loss) / args.logging_steps, )) logging_loss = tr_loss if args.save_steps > 0 and global_steps % args.save_steps == 0: logger.info("********** Running evaluating **********") logger.info(f"********** Step {global_steps} **********") output_dir = os.path.join(args.output_dir, f"step-{global_steps}") os.makedirs(output_dir, exist_ok=True) if args.task_name == "mnli": matched_results = evaluate( model, dev_dataloader_match, tokenizer, label2id, metric_list, generate_max_length, ) for k, v in matched_results.items(): writer.add_scalar(f"eval/matched_{k}", v, global_steps) logger.info(f" {k} = {v}") mismatched_results = evaluate( model, dev_dataloader_mismatch, tokenizer, label2id, metric_list, generate_max_length, ) for k, v in mismatched_results.items(): writer.add_scalar(f"eval/mismatched_{k}", v, global_steps) logger.info(f" {k} = {v}") else: eval_results = evaluate( model, dev_dataloader, tokenizer, label2id, metric_list, generate_max_length, ) for k, v in eval_results.items(): writer.add_scalar(f"eval/{k}", v, global_steps) logger.info(f" {k} = {v}") model.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) logger.info("********** Evaluating Done **********") if global_steps >= args.max_train_steps: logger.info("********** Running evaluating **********") logger.info(f"********** Step {global_steps} **********") output_dir = os.path.join(args.output_dir, f"step-{global_steps}") os.makedirs(output_dir, exist_ok=True) if args.task_name == "mnli": matched_results = evaluate( model, dev_dataloader_match, tokenizer, label2id, metric_list, generate_max_length, ) for k, v in matched_results.items(): writer.add_scalar(f"eval/matched_{k}", v, global_steps) logger.info(f" {k} = {v}") mismatched_results = evaluate( model, dev_dataloader_mismatch, tokenizer, label2id, metric_list, generate_max_length, ) for k, v in mismatched_results.items(): writer.add_scalar(f"eval/mismatched_{k}", v, global_steps) logger.info(f" {k} = {v}") else: eval_results = evaluate( model, dev_dataloader, tokenizer, label2id, metric_list, generate_max_length, ) for k, v in eval_results.items(): writer.add_scalar(f"eval/{k}", v, global_steps) logger.info(f" {k} = {v}") model.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) logger.info("********** Evaluating Done **********") logger.info("********** Training Done **********") return
def __init__(self, cfgs): save_dict = OrderedDict() save_dict["fold"] = cfgs["fold"] if cfgs["memo"] is not None: save_dict["memo"] = cfgs["memo"] # 1,2,3 specific_dir = ["{}-{}".format(key, save_dict[key]) for key in save_dict.keys()] cfgs["save_dir"] = os.path.join( cfgs["save_dir"], # cfgs["model"]["meta"], # cfgs["model"]["inputs"]["label"], "_".join(specific_dir), ) os.makedirs(cfgs["save_dir"], exist_ok=True) ####### CONFIGS self.cfgs = cfgs ####### Logging self.tb_writer = utils.get_writer(self.cfgs) self.txt_logger = utils.get_logger(self.cfgs) self.do_logging = True if len(self.cfgs["gpu"]) > 1: if dist.get_rank() != 0: self.do_logging = False if self.do_logging: self.txt_logger.write("\n\n----train.py----") self.txt_logger.write("\n{}".format(datetime.datetime.now())) self.txt_logger.write( "\n\nSave Directory: \n{}".format(self.cfgs["save_dir"]) ) self.txt_logger.write("\n\nConfigs: \n{}\n".format(self.cfgs)) ####### MODEL model = models.get_model(self.cfgs) if len(self.cfgs["gpu"]) > 1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) self.device = torch.device("cuda:{}".format(self.cfgs["local_rank"])) self.model = model.to(self.device) self.model = DistributedDataParallel( self.model, device_ids=[self.cfgs["local_rank"]], output_device=self.cfgs["local_rank"], ) else: self.device = torch.device("cuda:{}".format(self.cfgs["local_rank"])) self.model = model.to(self.device) ####### Data train_dataset = inputs.get_dataset(self.cfgs, mode="train") if len(self.cfgs["gpu"]) > 1: train_sampler = DistributedSampler( train_dataset, num_replicas=len(self.cfgs["gpu"]), rank=self.cfgs["local_rank"], ) else: train_sampler = None self.train_loader = DataLoader( dataset=train_dataset, batch_size=self.cfgs["batch_size"], num_workers=self.cfgs["num_workers"], pin_memory=True, drop_last=False, collate_fn=inputs.get_collater(), sampler=train_sampler, ) # if self.do_logging: # self.txt_logger.write("\nDataset: ") # self.txt_logger.write( # "\nTRAIN Abnormal/Normal: {}/{}".format( # len(train_dataset.abnormal_meta_df), # len(train_dataset.normal_meta_df), # ) # ) ####### Opts self.optimizer = opts.get_optimizer(self.cfgs, self.model.parameters()) self.scheduler = opts.get_scheduler(self.cfgs, self.optimizer) self.grad_scaler = GradScaler(enabled=self.cfgs["use_amp"]) ####### Validator self.validator = Validator(self.cfgs, self.device)
parser = argparse.ArgumentParser() parser.add_argument("-d", "--device", type=int, help="gpu id") parser.add_argument("-n", "--log", type=str, help="name of log folder") parser.add_argument("-p", "--hparams", type=str, help="hparams config file") opts = parser.parse_args() # Get CUDA/CPU device device = get_device(opts.device) print('Loading data..') hparams = load_json('./configs', opts.hparams) dataset_a, dataset_b = get_datasets(**hparams['dataset']) loader_a = DataLoader(dataset_a, **hparams['loading']) loader_b = DataLoader(dataset_b, **hparams['loading']) model = TravelGAN(hparams['model'], device=device) writer, monitor = get_writer(opts.log) print('Start training..') for epoch in range(hparams['n_epochs']): # Run one epoch dis_losses, gen_losses = [], [] for x_a, x_b in zip(loader_a, loader_b): # Loading on device x_a = x_a.to(device, non_blocking=True) x_b = x_b.to(device, non_blocking=True) # Calculate losses and update weights dis_loss = model.dis_update(x_a, x_b) gen_loss = model.gen_update(x_a, x_b) dis_losses.append(dis_loss) gen_losses.append(gen_loss)