Ejemplo n.º 1
0
def training_loop(dataset, batch_sizes, learning_rates, local_folder, epochs, solver_params,
                  fit_params, stem='', root='../models', phase_path='', annotator_path=''):

    # Training Loop
    for batch_size, lr in product(batch_sizes, learning_rates):
        # sub path
        sub_path = f'{local_folder}/'
        if phase_path is not '':
            sub_path += f'{phase_path}/'
        if annotator_path is not '':
            sub_path += f'{annotator_path}/'

        # For Documentation
        current_time = datetime.datetime.now(pytz.timezone('Europe/Berlin')).strftime("%Y%m%d-%H%M%S")
        hyperparams = {'batch': batch_size, 'lr': lr}
        writer = get_writer(path=f'../logs/{sub_path}', stem=stem,
                            current_time=current_time, params=hyperparams)

        # Save model path
        if local_folder != '' and not os.path.exists('../models/' + sub_path):
            os.makedirs('../models/' + sub_path)
        path = '../models/'
        if local_folder != '':
            path += sub_path
        save_params = {'stem': stem, 'current_time': current_time, 'hyperparams': hyperparams}

        # Training
        solver = Solver(dataset, lr, batch_size, writer=writer, save_path_head=path, save_params=save_params,
                        **solver_params)
        model, f1 = solver.fit(**fit_params)

        # Save model
        model_path = get_model_path(path, stem, current_time, hyperparams, f1)
        torch.save(model.state_dict(), model_path + f'_epoch{epochs}.pt')
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(description='Proxyless-NAS augment')
    parser.add_argument('-n',
                        '--name',
                        type=str,
                        required=True,
                        help="name of the model")
    parser.add_argument('-c',
                        '--config',
                        type=str,
                        default='./config/default.yaml',
                        help="yaml config file")
    parser.add_argument('-p',
                        '--chkpt',
                        type=str,
                        default=None,
                        help="path of checkpoint pt file")
    parser.add_argument('-d',
                        '--device',
                        type=str,
                        default="all",
                        help="override device ids")
    parser.add_argument('-g',
                        '--genotype',
                        type=str,
                        default=None,
                        help="override genotype file")
    args = parser.parse_args()

    hp = HParam(args.config)

    pt_path = os.path.join('.', hp.log.chkpt_dir)
    out_dir = os.path.join(pt_path, args.name)
    os.makedirs(out_dir, exist_ok=True)

    log_dir = os.path.join('.', hp.log.log_dir)
    log_dir = os.path.join(log_dir, args.name)
    os.makedirs(log_dir, exist_ok=True)

    logger = utils.get_logger(log_dir, args.name)

    if utils.check_config(hp, args.name):
        raise Exception("Config error.")

    writer = utils.get_writer(log_dir, hp.log.writer)

    dev, dev_list = utils.init_device(hp.device, args.device)

    trn_loader = load_data(hp.augment.data, validation=False)
    val_loader = load_data(hp.augment.data, validation=True)

    gt.set_primitives(hp.genotypes)

    # load genotype
    genotype = utils.get_genotype(hp.augment, args.genotype)

    model, arch = get_model(hp.model, dev, dev_list, genotype)

    augment(out_dir, args.chkpt, trn_loader, val_loader, model, writer, logger,
            dev, hp.augment)
Ejemplo n.º 3
0
    def __init__(self, config):
        # Environment
        # ===================================================================
        self.config = config
        self.device = config["train"]["device"] if torch.cuda.is_available() else "cpu"

        # Dataset
        # ===================================================================
        train_dataset, query_dataset, base_dataset, alphabet_len, max_str_len = \
                get_dataset(path_to_dataset=config["dataset"]["path_to_dataset"],
                            training_set_num=config["dataset"]["training_set_num"], 
                            query_set_num=config["dataset"]["query_set_num"],
                            neighbor_num=config["dataset"]["neighbor_num"])

        self.train_loader = DataLoader(dataset=train_dataset,
                                       batch_size=config["dataloader"]["batch_size"],
                                       num_workers=config["dataloader"]["num_workers"],
                                       shuffle=True)
        self.query_loader = DataLoader(dataset=query_dataset,
                                       batch_size=config["dataloader"]["batch_size"],
                                       num_workers=config["dataloader"]["num_workers"],
                                       shuffle=False)
        self.base_loader = DataLoader(dataset=base_dataset,
                                     batch_size=config["dataloader"]["batch_size"],
                                     num_workers=config["dataloader"]["num_workers"],
                                     shuffle=False)

        # Model
        # ===================================================================
        model_config = get_model_config(n_features=config["model"]["n_features"])
        model = Model(model_config, alphabet_len, max_str_len)
        self.model = model.to(self.device)

        # Optimizer
        # ===================================================================
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=config["optimizer"]["lr"])

        # Loss Function
        # ===================================================================
        criterion = Criterion(config["criterion"]["alpha"])
        self.criterion = criterion.to(self.device)

        # Training State
        # ===================================================================
        self.current_epoch = -1
        self.current_acc = 0

        # Logger
        # ===================================================================
        self.writer = get_writer(config["train"]["logdir_tb"])
        get_logger(config["train"]["logdir"])
        self.losses = AverageMeter()
        self.triplet_losses = AverageMeter()
        self.appro_losses = AverageMeter()
Ejemplo n.º 4
0
    def __init__(self, cfgs):

        save_dict = OrderedDict()
        save_dict["fold"] = cfgs["fold"]
        if cfgs["memo"] is not None:
            save_dict["memo"] = cfgs["memo"]  # 1,2,3

        specific_dir = [
            "{}-{}".format(key, save_dict[key]) for key in save_dict.keys()
        ]

        cfgs["save_dir"] = os.path.join(
            cfgs["save_dir"],
            cfgs["model"]["meta"],
            cfgs["model"]["inputs"]["label"],
            "_".join(specific_dir),
        )

        # cfgs["save_dir"] = os.path.join(cfgs["save_dir"], "_".join(specific_dir))
        os.makedirs(cfgs["save_dir"], exist_ok=True)

        self.cfgs = cfgs
        self.cfgs_test = cfgs["model"]["test"]

        self.tb_writer = utils.get_writer(self.cfgs)
        self.txt_logger = utils.get_logger(self.cfgs)

        self.txt_logger.write("\n\n----test.py----")
        self.txt_logger.write("\n{}".format(datetime.datetime.now()))
        self.txt_logger.write("\n\nSave Directory: \n{}".format(
            self.cfgs["save_dir"]))
        self.txt_logger.write("\n\nConfigs: \n{}\n".format(self.cfgs))

        ####### MODEL
        # NOTE: No Multiple GPU Support for Test
        model = models.get_model(self.cfgs)
        self.device = torch.device("cuda:{}".format(self.cfgs["local_rank"]))
        self.model = model.to(self.device)
Ejemplo n.º 5
0

def translate(url, my_writer, blog_type: BlogType):
    # net request
    print("do request with url : " + url)
    context = ssl._create_unverified_context()
    req = request.Request(url)
    req.add_header("User-Agent", USER_AGENT)
    response = request.urlopen(req, context=context).read().decode('utf-8')
    soup = BeautifulSoup(response, 'html.parser')

    # write blog header
    utils.write_blog_header(soup, my_writer, blog_type)

    root_tag = utils.get_root_tag(soup, blog_type)
    if root_tag is None:
        print("root tag is None !")
        exit()
    root_processor = processorChainBuilder.build_tag_processor(my_writer)
    root_processor.check(root_tag)

    print("done !")


if __name__ == '__main__':
    url_param, output_dir, blog_type_param = utils.check_params()
    if not os.path.exists(output_dir) or not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    writer = utils.get_writer(output_dir, blog_type_param)
    translate(url_param, writer, blog_type_param)
Ejemplo n.º 6
0
def main(args):
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
        handlers=[
            logging.FileHandler(
                os.path.join(args.output_dir, "run.log"),
                mode="w",
                encoding="utf-8",
            )
        ],
    )
    logger.info("**********  Configuration Arguments **********")
    for arg, value in sorted(vars(args).items()):
        logger.info(f"{arg}: {value}")
    logger.info("**************************************************")
    set_seed(args)

    # metric and label
    label_name = GLUE_PROCESSED[args.task_name][1]
    if label_name:
        label2id = dict(zip(label_name, range(len(label_name))))
    else:
        label2id = None
    metric_list = GLUE_METRICS[args.task_name]
    generate_max_length = label_length_map[args.task_name]

    writer = get_writer(args)

    # get model and tokenizer
    model = T5ForConditionalGeneration.from_pretrained(args.model_name_or_path)
    tokenizer = T5Tokenizer.from_pretrained(args.model_name_or_path)

    # get dataloader
    train_dataloader = get_train_dataloader(tokenizer, args)
    if args.task_name == "mnli":
        dev_dataloader_match = get_mnli_dev_dataloader(tokenizer,
                                                       args,
                                                       matched=True)
        dev_dataloader_mismatch = get_mnli_dev_dataloader(tokenizer,
                                                          args,
                                                          matched=False)
    else:
        dev_dataloader = get_dev_dataloader(tokenizer, args)

    num_update_steps_per_epoch = math.ceil(
        len(train_dataloader) / args.gradient_accumulation_steps)
    if args.max_train_steps > 0:
        args.num_train_epochs = math.ceil(args.max_train_steps /
                                          num_update_steps_per_epoch)
    else:
        args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch

    # get lr_scheduler
    lr_scheduler = get_scheduler(
        learning_rate=args.learning_rate,
        scheduler_type=args.scheduler_type,
        num_warmup_steps=args.warmup_steps
        if args.warmup_steps > 0 else args.warmup_radio,
        num_training_steps=args.max_train_steps,
    )

    total_batch_size = args.train_batch_size * args.gradient_accumulation_steps

    decay_params = [
        p.name for n, p in model.named_parameters()
        if not any(nd in n for nd in ["bias", "norm"])
    ]

    optimizer = AdamW(
        learning_rate=lr_scheduler,
        beta1=0.9,
        beta2=0.999,
        epsilon=args.adam_epsilon,
        parameters=model.parameters(),
        weight_decay=args.weight_decay,
        apply_decay_param_fun=lambda x: x in decay_params,
    )

    if args.use_amp:
        scaler = GradScaler(init_loss_scaling=args.scale_loss)

    logger.info("********** Running training **********")
    logger.info(f"  Num examples = {len(train_dataloader.dataset)}")
    logger.info(f"  Num Epochs = {args.num_train_epochs}")
    logger.info(f"  Instantaneous train batch size = {args.train_batch_size}")
    logger.info(f"  Instantaneous eval batch size = {args.eval_batch_size}")
    logger.info(
        f"  Total train batch size (w. accumulation) = {total_batch_size}")
    logger.info(
        f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
    logger.info(f"  Total optimization steps = {args.max_train_steps}")

    progress_bar = tqdm(range(args.max_train_steps))

    global_steps = 0
    tr_loss, logging_loss = 0.0, 0.0

    for _ in range(args.num_train_epochs):
        for step, batch in enumerate(train_dataloader):
            model.train()
            with auto_cast(args.use_amp,
                           custom_white_list=["layer_norm", "softmax"]):
                source_ids, source_mask, labels, target_mask = batch
                outputs = model(
                    input_ids=source_ids,
                    attention_mask=source_mask,
                    labels=labels,
                    decoder_attention_mask=target_mask,
                )
                loss = outputs[0] / args.gradient_accumulation_steps
                tr_loss += loss.item()

            if args.use_amp:
                scaler.scale(loss).backward()
            else:
                loss.backward()

            if (step % args.gradient_accumulation_steps == 0
                    or step == len(train_dataloader) - 1):
                if args.use_amp:
                    scaler.minimize(optimizer, loss)
                else:
                    optimizer.step()

                lr_scheduler.step()
                optimizer.clear_grad()
                progress_bar.update(1)
                global_steps += 1

                if args.logging_steps > 0 and global_steps % args.logging_steps == 0:
                    writer.add_scalar("lr", lr_scheduler.get_lr(),
                                      global_steps)
                    writer.add_scalar(
                        "loss",
                        (tr_loss - logging_loss) / args.logging_steps,
                        global_steps,
                    )
                    logger.info(
                        "global_steps {} - lr: {:.10f}  loss: {:.10f}".format(
                            global_steps,
                            lr_scheduler.get_lr(),
                            (tr_loss - logging_loss) / args.logging_steps,
                        ))
                    logging_loss = tr_loss

                if args.save_steps > 0 and global_steps % args.save_steps == 0:
                    logger.info("********** Running evaluating **********")
                    logger.info(f"********** Step {global_steps} **********")
                    output_dir = os.path.join(args.output_dir,
                                              f"step-{global_steps}")
                    os.makedirs(output_dir, exist_ok=True)

                    if args.task_name == "mnli":
                        matched_results = evaluate(
                            model,
                            dev_dataloader_match,
                            tokenizer,
                            label2id,
                            metric_list,
                            generate_max_length,
                        )
                        for k, v in matched_results.items():
                            writer.add_scalar(f"eval/matched_{k}", v,
                                              global_steps)
                            logger.info(f"  {k} = {v}")
                        mismatched_results = evaluate(
                            model,
                            dev_dataloader_mismatch,
                            tokenizer,
                            label2id,
                            metric_list,
                            generate_max_length,
                        )
                        for k, v in mismatched_results.items():
                            writer.add_scalar(f"eval/mismatched_{k}", v,
                                              global_steps)
                            logger.info(f"  {k} = {v}")
                    else:
                        eval_results = evaluate(
                            model,
                            dev_dataloader,
                            tokenizer,
                            label2id,
                            metric_list,
                            generate_max_length,
                        )
                        for k, v in eval_results.items():
                            writer.add_scalar(f"eval/{k}", v, global_steps)
                            logger.info(f"  {k} = {v}")
                    model.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)
                    logger.info("********** Evaluating Done **********")

            if global_steps >= args.max_train_steps:
                logger.info("********** Running evaluating **********")
                logger.info(f"********** Step {global_steps} **********")
                output_dir = os.path.join(args.output_dir,
                                          f"step-{global_steps}")
                os.makedirs(output_dir, exist_ok=True)

                if args.task_name == "mnli":
                    matched_results = evaluate(
                        model,
                        dev_dataloader_match,
                        tokenizer,
                        label2id,
                        metric_list,
                        generate_max_length,
                    )
                    for k, v in matched_results.items():
                        writer.add_scalar(f"eval/matched_{k}", v, global_steps)
                        logger.info(f"  {k} = {v}")
                    mismatched_results = evaluate(
                        model,
                        dev_dataloader_mismatch,
                        tokenizer,
                        label2id,
                        metric_list,
                        generate_max_length,
                    )
                    for k, v in mismatched_results.items():
                        writer.add_scalar(f"eval/mismatched_{k}", v,
                                          global_steps)
                        logger.info(f"  {k} = {v}")
                else:
                    eval_results = evaluate(
                        model,
                        dev_dataloader,
                        tokenizer,
                        label2id,
                        metric_list,
                        generate_max_length,
                    )
                    for k, v in eval_results.items():
                        writer.add_scalar(f"eval/{k}", v, global_steps)
                        logger.info(f"  {k} = {v}")
                model.save_pretrained(output_dir)
                tokenizer.save_pretrained(output_dir)
                logger.info("********** Evaluating Done **********")
                logger.info("********** Training Done **********")
                return
Ejemplo n.º 7
0
    def __init__(self, cfgs):

        save_dict = OrderedDict()

        save_dict["fold"] = cfgs["fold"]
        if cfgs["memo"] is not None:
            save_dict["memo"] = cfgs["memo"]  # 1,2,3
        specific_dir = ["{}-{}".format(key, save_dict[key]) for key in save_dict.keys()]

        cfgs["save_dir"] = os.path.join(
            cfgs["save_dir"],
            # cfgs["model"]["meta"],
            # cfgs["model"]["inputs"]["label"],
            "_".join(specific_dir),
        )
        os.makedirs(cfgs["save_dir"], exist_ok=True)

        ####### CONFIGS
        self.cfgs = cfgs

        ####### Logging
        self.tb_writer = utils.get_writer(self.cfgs)
        self.txt_logger = utils.get_logger(self.cfgs)

        self.do_logging = True
        if len(self.cfgs["gpu"]) > 1:
            if dist.get_rank() != 0:
                self.do_logging = False

        if self.do_logging:
            self.txt_logger.write("\n\n----train.py----")
            self.txt_logger.write("\n{}".format(datetime.datetime.now()))
            self.txt_logger.write(
                "\n\nSave Directory: \n{}".format(self.cfgs["save_dir"])
            )
            self.txt_logger.write("\n\nConfigs: \n{}\n".format(self.cfgs))

        ####### MODEL
        model = models.get_model(self.cfgs)
        if len(self.cfgs["gpu"]) > 1:
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
            self.device = torch.device("cuda:{}".format(self.cfgs["local_rank"]))
            self.model = model.to(self.device)
            self.model = DistributedDataParallel(
                self.model,
                device_ids=[self.cfgs["local_rank"]],
                output_device=self.cfgs["local_rank"],
            )
        else:
            self.device = torch.device("cuda:{}".format(self.cfgs["local_rank"]))
            self.model = model.to(self.device)

        ####### Data

        train_dataset = inputs.get_dataset(self.cfgs, mode="train")
        if len(self.cfgs["gpu"]) > 1:
            train_sampler = DistributedSampler(
                train_dataset,
                num_replicas=len(self.cfgs["gpu"]),
                rank=self.cfgs["local_rank"],
            )
        else:
            train_sampler = None

        self.train_loader = DataLoader(
            dataset=train_dataset,
            batch_size=self.cfgs["batch_size"],
            num_workers=self.cfgs["num_workers"],
            pin_memory=True,
            drop_last=False,
            collate_fn=inputs.get_collater(),
            sampler=train_sampler,
        )

        # if self.do_logging:
        #     self.txt_logger.write("\nDataset: ")
        #     self.txt_logger.write(
        #         "\nTRAIN Abnormal/Normal: {}/{}".format(
        #             len(train_dataset.abnormal_meta_df),
        #             len(train_dataset.normal_meta_df),
        #         )
        #     )

        ####### Opts
        self.optimizer = opts.get_optimizer(self.cfgs, self.model.parameters())
        self.scheduler = opts.get_scheduler(self.cfgs, self.optimizer)
        self.grad_scaler = GradScaler(enabled=self.cfgs["use_amp"])

        ####### Validator
        self.validator = Validator(self.cfgs, self.device)
Ejemplo n.º 8
0
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--device", type=int, help="gpu id")
parser.add_argument("-n", "--log", type=str, help="name of log folder")
parser.add_argument("-p", "--hparams", type=str, help="hparams config file")
opts = parser.parse_args()

# Get CUDA/CPU device
device = get_device(opts.device)

print('Loading data..')
hparams = load_json('./configs', opts.hparams)
dataset_a, dataset_b = get_datasets(**hparams['dataset'])
loader_a = DataLoader(dataset_a, **hparams['loading'])
loader_b = DataLoader(dataset_b, **hparams['loading'])
model = TravelGAN(hparams['model'], device=device)
writer, monitor = get_writer(opts.log)

print('Start training..')
for epoch in range(hparams['n_epochs']):
    # Run one epoch
    dis_losses, gen_losses = [], []
    for x_a, x_b in zip(loader_a, loader_b):
        # Loading on device
        x_a = x_a.to(device, non_blocking=True)
        x_b = x_b.to(device, non_blocking=True)

        # Calculate losses and update weights
        dis_loss = model.dis_update(x_a, x_b)
        gen_loss = model.gen_update(x_a, x_b)
        dis_losses.append(dis_loss)
        gen_losses.append(gen_loss)