コード例 #1
0
    def prepare_dirs(self, hparams):
        dataset_desc = 'NER'
        self.config.model_name = "{}_{}".format(dataset_desc, get_time())
        self.config.model_dir = os.path.join(self.config.log_dir,
                                             self.config.model_name)

        for path in [self.config.log_dir, self.config.model_dir]:
            if not os.path.exists(path):
                os.makedirs(path)
        save_hparams(self.config.model_dir, hparams)
        copy_file("hparams.py",
                  os.path.join(self.config.model_dir, "hparams.py"))
コード例 #2
0
ファイル: main.py プロジェクト: efikarra/hierarchical-rnn-tf
def process_or_load_hparams(out_dir, default_hparams, hparams_path):
    hparams = default_hparams
    # if a Hparams path is given as argument, override the default_hparams.
    hparams = utils.maybe_parse_standard_hparams(hparams, hparams_path)
    # extend HParams to add some parameters necessary for the training.
    hparams = process_hparams(hparams)
    # check compatibility of HParams
    check_hparams(hparams)
    # Save HParams
    utils.save_hparams(out_dir, hparams)

    # Print HParams
    print("Print hyperparameters:")
    utils.print_hparams(hparams)
    return hparams
コード例 #3
0
def _external_eval(model, global_step, sess, hparams, iterator,
                   iterator_feed_dict, tgt_file, lbl_file, label,
                   summary_writer, save_on_best):
    """External evaluation such as BLEU and ROUGE scores."""
    out_dir = hparams.out_dir
    decode = global_step > 0

    if decode:
        utils.print_out("# External evaluation, global step %d" % global_step)

    sess.run(iterator.initializer, feed_dict=iterator_feed_dict)

    slot_output = os.path.join(out_dir, "slot_output_%s" % label)
    intent_output = os.path.join(out_dir, "intent_output_%s" % label)
    scores = nmt_utils.decode_and_evaluate(
        label,
        model,
        sess,
        slot_output,
        intent_output,
        ref_file=tgt_file,
        ref_lbl_file=lbl_file,
        metrics=hparams.metrics,
        subword_option=hparams.subword_option,
        beam_width=hparams.beam_width,
        tgt_eos=hparams.eos,
        task=hparams.task,
        decode=decode,
        infer_mode=hparams.infer_mode)
    # Save on best metrics
    if decode:
        for metric in hparams.metrics:
            best_metric_label = "best_" + metric

            utils.add_summary(summary_writer, global_step,
                              "%s_%s" % (label, metric), scores[metric])
            # metric: larger is better
            if save_on_best and scores[metric] > getattr(
                    hparams, best_metric_label):
                setattr(hparams, best_metric_label, scores[metric])
                model.saver.save(sess,
                                 os.path.join(
                                     getattr(hparams,
                                             best_metric_label + "_dir"),
                                     "translate.ckpt"),
                                 global_step=model.global_step)
        utils.save_hparams(out_dir, hparams)
    return scores
コード例 #4
0
ファイル: main.py プロジェクト: efikarra/vae
def create_or_load_hparams(out_dir, default_hparams, flags):
    """Create hparams or load hparams from out_dir."""
    hparams = utils.load_hparams(out_dir)
    if not hparams:
        hparams = default_hparams
        hparams = utils.maybe_parse_standard_hparams(hparams,
                                                     flags.hparams_path)
        hparams.add_hparam("x_dim", hparams.img_width * hparams.img_height)
    else:
        hparams = utils.ensure_compatible_hparams(hparams, default_hparams,
                                                  flags)

    # Save HParams
    utils.save_hparams(out_dir, hparams)

    # Print HParams
    utils.print_hparams(hparams)
    return hparams
コード例 #5
0
def create_or_load_hparams(out_dir, default_hparams, flags):
    # if the out_dir already contains hparams file, load these hparams.
    hparams = utils.load_hparams(out_dir)
    if not hparams:
        hparams = default_hparams
        hparams = utils.maybe_parse_standard_hparams(hparams,
                                                     flags.hparams_path)
        hparams = extend_hparams(hparams)
    else:
        #ensure that the loaded hparams and the command line hparams are compatible. If not, the command line hparams are overwritten!
        hparams = utils.ensure_compatible_hparams(hparams, default_hparams,
                                                  flags)

    # Save HParams
    utils.save_hparams(out_dir, hparams)

    # Print HParams
    print("Print hyperparameters:")
    utils.print_hparams(hparams)
    return hparams
コード例 #6
0
def create_or_load_hparams(
    out_dir, default_hparams, hparams_path, save_hparams=True):
  """Create hparams or load hparams from out_dir."""
  hparams = utils.load_hparams(out_dir)
  if not hparams:
    hparams = default_hparams
    hparams = utils.maybe_parse_standard_hparams(
        hparams, hparams_path)
  else:
    hparams = ensure_compatible_hparams(hparams, default_hparams, hparams_path)
  hparams = extend_hparams(hparams)

  # Save HParams
  if save_hparams:
    utils.save_hparams(out_dir, hparams)
    for metric in hparams.metrics:
      utils.save_hparams(getattr(hparams, "best_" + metric + "_dir"), hparams)

  # Print HParams
  utils.print_hparams(hparams)
  return hparams
コード例 #7
0
    def initialize(self,
                   logdir=None,
                   coolname=False,
                   hparams=None,
                   tensorboard=False,
                   no_timestamp=False,
                   global_rank=0,
                   eager_flush=True):
        '''
        Initialize logx

        inputs
        - logdir - where to write logfiles
        - tensorboard - whether to write to tensorboard file
        - global_rank - must set this if using distributed training, so we only
          log from rank 0
        - coolname - generate a unique directory name underneath logdir, else
          use logdir as output directory
        - hparams - only use if not launching jobs with runx, which also saves
          the hparams.
        - eager_flush - call `flush` after every tensorboard write
        '''
        self.rank0 = (global_rank == 0)
        self.initialized = True

        if logdir is not None:
            self.logdir = logdir
        else:
            logroot = get_logroot()
            if coolname:
                from coolname import generate_slug
                self.logdir = os.path.join(logroot, generate_slug(2))
            else:
                self.logdir = os.path.join(logroot, 'default')

        # confirm target log directory exists
        if not os.path.isdir(self.logdir):
            os.makedirs(self.logdir, exist_ok=True)

        if hparams is not None and self.rank0:
            save_hparams(hparams, self.logdir)

        # Tensorboard file
        if self.rank0 and tensorboard:
            self.tb_writer = SummaryWriter(log_dir=self.logdir, flush_secs=1)
        else:
            self.tb_writer = None

        self.eager_flush = eager_flush

        # This allows us to use the tensorboard with automatic checking of both
        # the `tensorboard` condition, as well as ensuring writes only happen
        # on rank0. Any function supported by `SummaryWriter` is supported by
        # `ConditionalProxy`. Additionally, flush will be called after any call
        # to this.
        self.tensorboard = ConditionalProxy(
            self.tb_writer,
            tensorboard and self.rank0,
            post_hook=self._flush_tensorboard,
        )

        if not self.rank0:
            return

        # Metrics file
        metrics_fn = os.path.join(self.logdir, 'metrics.csv')
        self.metrics_fp = open(metrics_fn, mode='a+')
        self.metrics_writer = csv.writer(self.metrics_fp, delimiter=',')

        # Log file
        log_fn = os.path.join(self.logdir, 'logging.log')
        self.log_file = open(log_fn, mode='a+')

        # save metric
        self.save_metric = None
        self.best_metric = None
        self.save_ckpt_fn = ''
        # Find the existing best checkpoint, and update `best_metric`,
        # if available
        self.best_ckpt_fn = self.get_best_checkpoint() or ''
        if self.best_ckpt_fn:
            best_chk = torch.load(self.best_ckpt_fn, map_location='cpu')
            self.best_metric = best_chk.get('__metric', None)
        self.epoch = defaultdict(lambda: 0)
        self.no_timestamp = no_timestamp

        # Initial timestamp, so that epoch time calculation is correct
        phase = 'start'
        csv_line = [phase]

        # add epoch/iter
        csv_line.append('{}/step'.format(phase))
        csv_line.append(0)

        # add timestamp
        if not self.no_timestamp:
            # this feature is useful for testing
            csv_line.append('timestamp')
            csv_line.append(time.time())

        self.metrics_writer.writerow(csv_line)
        self.metrics_fp.flush()
コード例 #8
0
from calc_rouge import calc_rouge
import os
from hparams import Hparams
import math
import logging

logging.basicConfig(level=logging.INFO)
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


logging.info("# hparams")
hparams = Hparams()
parser = hparams.parser
hp = parser.parse_args()
save_hparams(hp, hp.modeldir)

logging.info("# Prepare train/eval batches")
train_batches, num_train_batches, num_train_samples = get_batch(hp.train_source, hp.train_target,
                                             hp.maxlen_source, hp.maxlen_target,
                                             hp.vocab, hp.batch_size,
                                             shuffle=True)
eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval_source, hp.eval_target,
                                             hp.maxlen_source, hp.maxlen_target,
                                             hp.vocab, hp.eval_batch_size,
                                             shuffle=False)

# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes)
xs, ys = iter.get_next()
コード例 #9
0
def train(conf, project_dir: Path, run_dir: Path) -> torch.nn.Module:
    writer = SummaryWriter(str(run_dir))
    save_hparams(conf, writer)
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    if conf.dataset == 'sine':
        # dataset of time-series
        dataset_train = SineData()
        dataloader_train = DataLoader(dataset_train,
                                      batch_size=conf.batch_size,
                                      drop_last=True)
        dataset_test = SineData()
        dataloader_test = DataLoader(dataset_test,
                                     batch_size=conf.batch_size,
                                     shuffle=False,
                                     drop_last=True)

        h_sizes = OmegaConf.to_container(
            conf.hidden_sizes)  # OmegaConf object to list
        model = MLPModel(dim_y=1,
                         dim_r=conf.dim_r,
                         dim_z_prime=conf.dim_z_prime,
                         dim_l=conf.dim_l,
                         hidden_sizes_encoder=h_sizes,
                         hidden_sizes_ode_net=h_sizes,
                         hidden_sizes_decoder=h_sizes,
                         t0=dataset_train.t0,
                         device=device)
    elif conf.dataset == 'sinefreq':
        # dataset of frequency varying sinus time-series
        dataset_train = FreqSineData(amplitude_range=(0.5, 1.),
                                     shift_range=(-.5, .5),
                                     freq_range=(1.0, 2.0),
                                     num_samples=5000)
        dataloader_train = DataLoader(dataset_train,
                                      batch_size=conf.batch_size,
                                      drop_last=True)
        dataset_test = FreqSineData(amplitude_range=(0.5, 1.),
                                    shift_range=(-.5, .5),
                                    freq_range=(1.0, 2.0),
                                    num_samples=1000)
        dataloader_test = DataLoader(dataset_test,
                                     batch_size=conf.batch_size,
                                     shuffle=False,
                                     drop_last=True)

        h_sizes = OmegaConf.to_container(
            conf.hidden_sizes)  # OmegaConf object to list
        model = MLPModel(dim_y=1,
                         dim_r=conf.dim_r,
                         dim_z_prime=conf.dim_z_prime,
                         dim_l=conf.dim_l,
                         hidden_sizes_encoder=h_sizes,
                         hidden_sizes_ode_net=h_sizes,
                         hidden_sizes_decoder=h_sizes,
                         t0=dataset_train.t0,
                         device=device)
    elif conf.dataset == 'noisysine':
        sigma = conf.sigma
        # dataset of noisy sinus time-series
        dataset_train = NoisySineData(sigma,
                                      shift_range=(-0.1, .1),
                                      freq_range=(1.9, 2.0),
                                      num_samples=1000)
        dataloader_train = DataLoader(dataset_train,
                                      batch_size=conf.batch_size,
                                      drop_last=True)
        dataset_test = NoisySineData(sigma,
                                     shift_range=(-0.1, .1),
                                     freq_range=(1.9, 2.0),
                                     num_samples=1000)
        dataloader_test = DataLoader(dataset_test,
                                     batch_size=conf.batch_size,
                                     shuffle=False,
                                     drop_last=True)

        h_sizes = OmegaConf.to_container(
            conf.hidden_sizes)  # OmegaConf object to list
        model = MLPModel(dim_y=1,
                         dim_r=conf.dim_r,
                         dim_z_prime=conf.dim_z_prime,
                         dim_l=conf.dim_l,
                         hidden_sizes_encoder=h_sizes,
                         hidden_sizes_ode_net=h_sizes,
                         hidden_sizes_decoder=h_sizes,
                         t0=dataset_train.t0,
                         device=device)
    elif conf.dataset == 'rotnist':
        # dataset of Rotating MNIST (in the literature)
        dataset_mnist = RotNISTDataset(data_dir=str(project_dir / 'data'))
        len_test = 10
        dataset_train = dataset_mnist[:len(dataset_mnist) - len_test]
        dataset_test = dataset_mnist[len(dataset_mnist) - len_test:]
        dataloader_train = DataLoader(dataset_train,
                                      batch_size=conf.batch_size,
                                      drop_last=True)
        dataloader_test = DataLoader(dataset_test,
                                     batch_size=conf.batch_size,
                                     shuffle=False,
                                     drop_last=True)
        h_sizes = OmegaConf.to_container(conf.hidden_sizes)
        model = ConvNetModel(dim_r=conf.dim_r,
                             dim_z_prime=conf.dim_z_prime,
                             dim_l=conf.dim_l,
                             hidden_sizes_ode_net=h_sizes,
                             t0=dataset_mnist.t0,
                             device=device)

    else:
        raise ValueError(f'Dataset {conf.dataset} not recognized')

    model = model.to(device)
    optimizer = torch.optim.RMSprop(model.parameters(), lr=conf.lr)

    context_range = OmegaConf.to_container(conf.context_range)
    extra_target_range = OmegaConf.to_container(conf.extra_target_range)
    global_train_step = 0
    global_test_step = 0
    for epoch in tqdm(range(conf.epochs)):
        mse_train_list = []
        mse_test_list = []

        with torch.no_grad():
            for step, (t, y) in enumerate(dataloader_test):
                t, y = t.to(device), y.to(device)
                t_context, y_context, t_extra, y_extra, _, _ = get_split(
                    t, y, test_context_size=conf.test_context_size)

                p_y, _, _ = model(
                    t_context, y_context, t_extra
                )  # for testing, we only need predictions at t_extra
                output = p_y.loc
                mse_test = F.mse_loss(output, y_extra)

                # log test results
                writer.add_scalar('mse_test', mse_test.item(),
                                  global_test_step)
                mse_test_list.append(mse_test.item())
                if step == 0 and epoch % 2 == 0:
                    if conf.dataset in ['sine', 'sinefreq', 'noisysine']:
                        log_sine_plot(writer, model, t, y, t_context,
                                      y_context, t_extra, epoch)
                    elif conf.dataset == 'rotnist':
                        log_rotnist_plot2(writer, model, t, y, epoch, 'test')
                global_test_step += 1

        for (t, y) in dataloader_train:
            t, y = t.to(device), y.to(device)
            (t_context, y_context, t_extra, y_extra, t_target,
             y_target) = get_split(t,
                                   y,
                                   context_range=context_range,
                                   extra_target_range=extra_target_range)

            p_y, q_z_T, q_z_C = model(t_context,
                                      y_context,
                                      t_target,
                                      y_target=y_target)
            log_p = p_y.log_prob(y_target).sum(dim=(1, 2)).mean(
                dim=0)  # mean on batch dim, sum on time dim/y dim

            output = p_y.loc
            mse_train = F.mse_loss(output, y_target)
            # mean on batch dim, sum on z dim (equivalent to kl_div of the multivariate normal)
            kl_div = kl_divergence(q_z_C, q_z_T).sum(dim=1).mean(dim=0)
            loss = -log_p + kl_div

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # log training metrics
            writer.add_scalar('kl_div', kl_div.item(), global_train_step)
            writer.add_scalar('log_p', log_p.item(), global_train_step)
            writer.add_scalar('train_loss', loss.item(), global_train_step)
            writer.add_scalar('mse_train', mse_train.item(), global_train_step)
            mse_train_list.append(mse_train.item())
            global_train_step += 1

        # log test/train mse epoch-wise to match the paper's figures
        writer.add_scalar('mse_train_epoch', np.mean(mse_train_list), epoch)
        writer.add_scalar('mse_test_epoch', np.mean(mse_test_list), epoch)
        if epoch % conf.checkpoint_freq == 0 and epoch > 0:
            torch.save(model.state_dict(), run_dir / f'model_ep{epoch}.pth')

    torch.save(model.state_dict(), run_dir / f'model.pth')
    return model
コード例 #10
0
def main():
    #torch.manual_seed(42)

    # ------------
    # args
    # ------------
    parser = ArgumentParser()

    # Learning parameters
    parser.add_argument('--auto_lr', type=U.str2bool, default=False,help="Auto lr finder")
    parser.add_argument('--learning_rate', type=float, default=10e-4)
    parser.add_argument('--scheduler', type=U.str2bool, default=False)
    parser.add_argument('--wd', type=float, default=2e-4)
    parser.add_argument('--moment', type=float, default=0.9)
    parser.add_argument('--batch_size', default=5, type=int)
    parser.add_argument('--n_epochs', default=10, type=int)
    parser.add_argument('--iter_every', default=1, type=int,help="Accumulate compute graph for iter_size step")
    parser.add_argument('--benchmark', default=False, type=U.str2bool, help="enable or disable backends.cudnn")
    
    # Model and eval
    parser.add_argument('--model', default='FCN', type=str,help="FCN or DLV3 model")
    parser.add_argument('--pretrained', default=False, type=U.str2bool,help="Use pretrained pytorch model")
    parser.add_argument('--eval_angle', default=True, type=U.str2bool,help=\
        "If true, it'll eval the model with different angle input size")
    
    
    # Data augmentation
    parser.add_argument('--rotate', default=False, type=U.str2bool,help="Use random rotation as data augmentation")
    parser.add_argument('--pi_rotate', default=True, type=U.str2bool,help="Use only pi/2 rotation angle")
    parser.add_argument('--p_rotate', default=0.25, type=float,help="Probability of rotating the image during the training")
    parser.add_argument('--scale', default=True, type=U.str2bool,help="Use scale as data augmentation")
    parser.add_argument('--landcover', default=False, type=U.str2bool,\
         help="Use Landcover dataset instead of VOC and COCO")
    parser.add_argument('--size_img', default=520, type=int,help="Size of input images")
    parser.add_argument('--size_crop', default=480, type=int,help="Size of crop image during training")
    parser.add_argument('--angle_max', default=360, type=int,help="Angle max for data augmentation")
    
    # Dataloader and gpu
    parser.add_argument('--nw', default=0, type=int,help="Num workers for the data loader")
    parser.add_argument('--pm', default=True, type=U.str2bool,help="Pin memory for the dataloader")
    parser.add_argument('--gpu', default=0, type=int,help="Wich gpu to select for training")
    
    # Datasets 
    parser.add_argument('--split', default=False, type=U.str2bool, help="Split the dataset")
    parser.add_argument('--split_ratio', default=0.3, type=float, help="Amount of data we used for training")
    parser.add_argument('--dataroot_voc', default='/data/voc2012', type=str)
    parser.add_argument('--dataroot_sbd', default='/data/sbd', type=str)
    parser.add_argument('--dataroot_landcover', default='/share/DEEPLEARNING/datasets/landcover', type=str)
    
    # Save parameters
    parser.add_argument('--model_name', type=str,help="what name to use for saving")
    parser.add_argument('--save_dir', default='/data/save_model', type=str)
    parser.add_argument('--save_all_ep', default=False, type=U.str2bool,help=\
        "If true it'll save the model every epoch in save_dir")
    parser.add_argument('--save_best', default=False, type=U.str2bool,help="If true will only save the best epoch model")
    args = parser.parse_args()
    
    # ------------
    # device
    # ------------
    device = torch.device("cuda:"+str(args.gpu) if torch.cuda.is_available() else "cpu")
    print("device used:",device)
    
    # ------------
    # data
    # ------------
    if args.size_img < args.size_crop:
        raise Exception('Cannot have size of input images less than size of crop')
    size_img = (args.size_img,args.size_img)
    size_crop = (args.size_crop,args.size_crop)
    if not args.landcover:
        train_dataset_VOC = mdset.VOCSegmentation(args.dataroot_voc,year='2012', image_set='train', \
            download=True,rotate=args.rotate,size_img=size_img,size_crop=size_crop)
        test_dataset = mdset.VOCSegmentation(args.dataroot_voc,year='2012', image_set='val', download=True)
        train_dataset_SBD = mdset.SBDataset(args.dataroot_sbd, image_set='train_noval',mode='segmentation',\
            rotate=args.rotate,size_img=size_img,size_crop=size_crop)
        #COCO dataset 
        if args.extra_coco:
            extra_COCO = cu.get_coco(args.dataroot_coco,'train',rotate=args.rotate,size_img=size_img,size_crop=size_crop)
            # Concatene dataset
            train_dataset = tud.ConcatDataset([train_dataset_VOC,train_dataset_SBD,extra_COCO])
        else:
            train_dataset = tud.ConcatDataset([train_dataset_VOC,train_dataset_SBD])
        num_classes = 21
    else:
        print('Loading Landscape Dataset')
        train_dataset = mdset.LandscapeDataset(args.dataroot_landcover,image_set="trainval",\
            rotate=args.rotate,pi_rotate=args.pi_rotate,p_rotate=args.p_rotate,size_img=size_img,size_crop=size_crop,angle_max=args.angle_max)
        test_dataset = mdset.LandscapeDataset(args.dataroot_landcover,image_set="test")
        print('Success load Landscape Dataset')
        num_classes = 4
    
    split = args.split
    if split==True:
        train_dataset = U.split_dataset(train_dataset,args.split_ratio)
    # Print len datasets
    print("There is",len(train_dataset),"images for training and",len(test_dataset),"for validation")
    dataloader_train = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size,num_workers=args.nw,\
        pin_memory=args.pm,shuffle=True,drop_last=True)#,collate_fn=U.my_collate)
    dataloader_val = torch.utils.data.DataLoader(test_dataset,num_workers=args.nw,pin_memory=args.pm,\
        batch_size=args.batch_size)

    
    # ------------
    # model
    # ------------
    
    if args.model.upper()=='FCN':
        model = models.segmentation.fcn_resnet101(pretrained=args.pretrained,num_classes=num_classes)
    elif args.model.upper()=='DLV3':
        model = models.segmentation.deeplabv3_resnet101(pretrained=args.pretrained,num_classes=num_classes)
    else:
        raise Exception('model must be "FCN" or "DLV3"')
    #model.to(device)

    
    # ------------
    # save
    # ------------
    save_dir = U.create_save_directory(args.save_dir)
    print('model will be saved in',save_dir)
    U.save_hparams(args,save_dir)

    # ------------
    # training
    # ------------
    # Auto lr finding
    print(args)
    
    criterion = nn.CrossEntropyLoss(ignore_index=num_classes) # On ignore la classe border.
    torch.autograd.set_detect_anomaly(True)
    optimizer = torch.optim.SGD(model.parameters(),lr=args.learning_rate,momentum=args.moment,weight_decay=args.wd)
    
    ev.train_fully_supervised(model=model,n_epochs=args.n_epochs,train_loader=dataloader_train,val_loader=dataloader_val,\
        criterion=criterion,optimizer=optimizer,save_folder=save_dir,scheduler=args.scheduler,auto_lr=args.auto_lr,\
            model_name=args.model_name,benchmark=args.benchmark, save_best=args.save_best,save_all_ep=args.save_all_ep,\
                device=device,num_classes=num_classes)
コード例 #11
0
import torch.nn as nn
import torch.optim as optim
from data_preprocessing import train_set, train_loader, val_loader
from LeNet import LeNet
from utils import view_bar
from sklearn.metrics import accuracy_score
from validation import validation
import time

# load the hyper-parameters
logging.basicConfig(level=logging.INFO)
logging.info("# Loading hyperparameters")
hparams = Hparams()
parser = hparams.parser
hp = parser.parse_args()
save_hparams(hp, hp.train_dir)

# identify the device to use
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
logging.info("Using %s" % DEVICE)

# instantiate the model, the loss function and optimizer
model = LeNet()
xentropy = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),
                      lr=hp.lr,
                      momentum=0.9)

# check the latest checkpoint to resume training
ckpt_path = latest_ckpt(hp.ckpt)
if ckpt_path is None:
コード例 #12
0
ファイル: evaluate.py プロジェクト: zhaoxlpku/KnowledGPT
def main(args):
    print("\nParameters:")
    for attr, value in sorted(vars(args).items()):
        print("{}={}".format(attr.upper(), value))
    print("")

    # Selecting wihch GPU to use
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_list
    args.cuda = torch.cuda.is_available() and not args.no_cuda

    # Output directory for models and summaries
    out_dir = os.path.join(args.log, args.exp_name)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    print('Writing to {}\n'.format(out_dir))
    save_hparams(args, os.path.join(out_dir, 'hparams'))

    # Checkpoint directory
    checkpoint_dir = os.path.join(out_dir, 'checkpoints')
    checkpoint_prefix = os.path.join(checkpoint_dir, 'model')
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    # Build dataset
    time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("Create training dataset begain... | %s " % time_str)

    test_seen_dataset = KGDataset(args.test_seen_file, max_knowledge=999)
    test_unseen_dataset = KGDataset(args.test_unseen_file, max_knowledge=999)

    test_seen_loader = get_batch_loader(test_seen_dataset,
                                        collate_fn=collate_fn,
                                        batch_size=args.eval_batch_size,
                                        is_test=True)
    test_unseen_loader = get_batch_loader(test_unseen_dataset,
                                          collate_fn=collate_fn,
                                          batch_size=args.eval_batch_size,
                                          is_test=True)

    time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("Create training dataset end... | %s " % time_str)

    # Batcher
    dis_batcher = DisBatcher(args.bert_truncate, args.bert_config, args.cuda)
    gen_batcher = GenBatcher(args.knowledge_truncate, args.text_truncate,
                             args.gpt2_truncate, args.gpt2_config, args.cuda)

    # Load model
    dis_model = load_dis_net(args.emb_dim, args.lstm_hidden, args.lstm_layer,
                             args.bert_config, args.dis_pretrain_file,
                             args.load_dis, args.cuda)
    gen_model = load_gen_net(gen_batcher.tokenizer, args.segment,
                             args.gpt2_config, args.gen_pretrain_file,
                             args.load_gen, args.cuda)

    ce = lambda logit, target: F.cross_entropy(logit, target, reduce=False)
    gen_criterion = lambda logits, targets: sequence_loss(
        logits, targets, ce, pad_idx=-1)

    def dev_step(split, global_step):

        if split == 'test_seen':
            test_loader = test_seen_loader
        elif split == 'test_unseen':
            test_loader = test_unseen_loader
        else:
            raise ValueError

        dis_model.eval()
        gen_model.eval()

        n_token, test_loss = 0, 0.0  # ppl
        test_hyp, test_ref = [], []
        count = 0

        with torch.no_grad():
            for knowledges, histories, users, responses, knowledge_lens in test_loader:
                knowledges = [know.split('\n\n') for know in knowledges]
                histories = [his.split('\n\n') for his in histories]

                dis_args = dis_batcher(knowledges, histories, knowledge_lens,
                                       args.n_sent)
                dis_out = dis_model(*dis_args)
                dis_knowledges = [[knowledges[bi][dis_out[0][bi].item()]]
                                  for bi in range(len(knowledges))]

                gen_args = gen_batcher(dis_knowledges, histories, users,
                                       responses, args.segment, True)
                loss = gen_criterion(
                    gen_model(gen_args[0], token_type_ids=gen_args[1])[0],
                    gen_args[2])
                n_token += loss.size(0)
                test_loss += loss.sum().item()

                for bi in range(len(dis_knowledges)):
                    dec_in = gen_batcher(dis_knowledges[bi:bi + 1],
                                         histories[bi:bi + 1],
                                         users[bi:bi + 1],
                                         segment=args.segment,
                                         training=False)
                    dec_out = gen_model.batch_decode(
                        dec_in, args.max_length, args.min_length,
                        args.early_stopping, args.beam_size,
                        args.repetition_penalty, gen_batcher.eos_id,
                        args.length_penalty, args.no_repeat_ngram_size)
                    dec_out = dec_out[0].tolist()[dec_in.size(1):]
                    _hyp = gen_batcher.tokenizer.decode(
                        dec_out,
                        skip_special_tokens=True,
                        clean_up_tokenization_spaces=False)
                    _ref = responses[bi]
                    test_hyp.append(_hyp)
                    test_ref.append(_ref)

                    count += 1
                    if count % 1000 == 0:
                        print(count)

        with open(
                os.path.join(
                    out_dir,
                    '{}-decoded-iter-{}.txt'.format(split, global_step)),
                'w') as f:
            for _hyp, _ref in zip(test_hyp, test_ref):
                f.writelines('{} ||| {}\n'.format(_hyp, _ref))

        MeanLoss = test_loss / n_token
        b1, b2, b3, b4 = bleu_metric(test_hyp, test_ref)
        d1, d2 = distinct_metric(test_hyp)
        f1 = f1_metric(test_hyp, test_ref)

        time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        print("**********************************")
        print("{} results..........".format(split))
        print('hypothesis: ', len(test_hyp))
        print("Step: %d \t| ppl: %.3f \t|  %s" %
              (global_step, math.exp(MeanLoss), time_str))
        print("BLEU-1/2/3/4: {:.4f}/{:.4f}/{:.4f}/{:.4f}".format(
            b1, b2, b3, b4))
        print("Distinct-1/2: {:.4f}/{:.4f}".format(d1, d2))
        print("F1: {:.4f}".format(f1))
        print("**********************************")

        return {
            'f1': f1,
            'loss': MeanLoss,
            'bleu1': b1,
            'bleu2': b2,
            'bleu3': b3,
            'bleu4': b4,
            'distinct1': d1,
            'distinct2': d2
        }

    dev_step("test_seen", 0)  # test_random_split
    dev_step("test_unseen", 0)  # test_topic_split
コード例 #13
0
ファイル: test_PAGE.py プロジェクト: zixufang/BERT-T2T
from bert_transformer_vae_for_PAGE import VaeModel

from data_load import get_batch_for_train_or_dev_or_test,saveForTfRecord
from utils import save_hparams, get_hypotheses
import os
from hparams import Hparams
import logging
os.environ['CUDA_VISIBLE_DEVICES']= '5'
logging.basicConfig(level=logging.INFO)


logging.info("# hparams")
hparams = Hparams()
parser = hparams.parser
hp = parser.parse_args()
save_hparams(hp, hp.PAGEdir)


logging.info("# 许海明提醒你: 这里需要准备tfRecord")
logging.info("# 许海明提醒你: 这里需要准备tfRecord")
logging.info("# 许海明提醒你: 这里需要准备tfRecord")
logging.info("# 许海明提醒你: 这里需要准备tfRecord")



saveForTfRecord(hp.test,
                hp.maxlen_vae_Encoder,
                hp.maxlen_vae_Decoder_en,
                hp.maxlen_vae_Decoder_de,
                hp.vocab,
                output_file="./data/PAGE/test.tf_record",
コード例 #14
0
def train(hp):
    save_hparams(hp, hp.checkpoints_dir)
    # Data generator
    logging.info("Prepare Train/Eval batches...")
    train_batches, num_train_batches, num_train_samples = get_batch(
        hp.train1,
        hp.train2,
        hp.maxlen1,
        hp.maxlen2,
        hp.vocab,
        hp.batch_size,
        shuffle=True)
    eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval1,
                                                                 hp.eval2,
                                                                 10000,
                                                                 10000,
                                                                 hp.vocab,
                                                                 hp.batch_size,
                                                                 shuffle=False)

    # Batch iterator
    iter = tf.data.Iterator.from_structure(train_batches.output_types,
                                           train_batches.output_shapes)
    xs, ys = iter.get_next()

    train_init_op = iter.make_initializer(train_batches)
    eval_init_op = iter.make_initializer(eval_batches)

    # Build model
    logging.info("Build model...")
    model = Transformer(hp)
    logging.info("Model is built!")

    # Session
    logging.info("Session initialize")
    saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        # Check & Load latest version model checkpoint
        ckpt = tf.train.latest_checkpoint(hp.checkpoints_dir)
        if ckpt is None:
            logging.info("Initializing from scratch")
            sess.run(tf.global_variables_initializer())
            save_variable_specs(os.path.join(hp.checkpoints_dir, "specs"))
        else:
            saver.restore(sess, ckpt)

        summary_writer = tf.summary.FileWriter(hp.checkpoints_dir, sess.graph)

        sess.run(train_init_op)
        total_steps = hp.num_epochs * num_train_batches
        _gs = sess.run(model.global_step)

        k = 5
        min_dev_loss = 0
        stop_alpha = 20.0
        eval_losses = []
        # Start training
        for i in tqdm(range(_gs, total_steps + 1)):
            _input_x, _decoder_input, _target = sess.run([xs[0], ys[0], ys[1]])
            _, _gs, _summary = sess.run(
                [model.train_op, model.global_step, model.summaries],
                feed_dict={
                    model.input_x: _input_x,
                    model.decoder_input: _decoder_input,
                    model.target: _target,
                    model.is_training: True
                })
            epoch = math.ceil(_gs / num_train_batches)
            summary_writer.add_summary(_summary, _gs)

            # Evaluation
            if _gs and _gs % num_train_batches == 0:
                logging.info("Epoch {} is done".format(epoch))
                _loss = sess.run(model.loss,
                                 feed_dict={
                                     model.input_x: _input_x,
                                     model.decoder_input: _decoder_input,
                                     model.target: _target,
                                     model.is_training: False
                                 })

                # evaluation
                y_hat, mean_loss = model.eval(sess, eval_init_op, xs, ys,
                                              num_eval_batches)

                # id to token
                logging.info("# Get hypotheses")
                hypotheses = get_hypotheses(num_eval_samples, y_hat,
                                            model.idx2token)

                # save translation results
                if not os.path.exists(hp.evaldir):
                    os.makedirs(hp.evaldir)
                logging.info("# Write results")
                model_output = "translation_E{:02d}L{:.2f}EL{:.2f}".format(
                    epoch, _loss, mean_loss)
                translation = os.path.join(hp.evaldir, model_output)
                with open(translation, 'w', encoding="utf-8") as fout:
                    fout.write("\n".join(hypotheses))
                logging.info(
                    "# Calculate bleu score and append it to translation")

                # bleu
                calc_bleu_nltk(hp.eval2, translation)

                # save model
                logging.info("# Save models")
                ckpt_name = os.path.join(hp.checkpoints_dir, model_output)
                saver.save(sess, ckpt_name, global_step=_gs)
                logging.info(
                    "After training of {} epochs, {} has been saved.".format(
                        epoch, ckpt_name))

                # claculate early stop
                if len(eval_losses) == 0:
                    min_dev_loss = mean_loss
                eval_losses.append(mean_loss)
                gl, p_k, pq_alpha = calculate_earlystop_baseline(
                    mean_loss, min_dev_loss, eval_losses, k)
                min_dev_loss = mean_loss if mean_loss < min_dev_loss else min_dev_loss
                eval_losses = eval_losses[-k:]
                logging.info(
                    "GL(t): {:.4f}, P_k: {:.4f}, PQ_alpha: {:.4f}".format(
                        gl, p_k, pq_alpha))
                if gl > stop_alpha:
                    logging.info(
                        "No optimization for a long time, auto-stopping...")
                    break

                # change data iterator back to train iterator
                sess.run(train_init_op)

        summary_writer.close()

    logging.info("Done")
コード例 #15
0
ファイル: train.py プロジェクト: LittleRoommmm/transformer
from model import Transformer
from tqdm import tqdm
from data_load import get_batch
from utils import save_hparams, save_variable_specs, get_hypotheses, calc_bleu
import os
from hparams import Hparams
import math
import logging

logging.basicConfig(level=logging.INFO)

logging.info("# hparams")
hparams = Hparams()  # 超参数
parser = hparams.parser
hp = parser.parse_args()
save_hparams(hp, hp.logdir)  # 超参数写入日志

logging.info("# Prepare train/eval batches")
train_batches, num_train_batches, num_train_samples = get_batch(hp.train1,
                                                                hp.train2,
                                                                hp.maxlen1,
                                                                hp.maxlen2,
                                                                hp.vocab,
                                                                hp.batch_size,
                                                                shuffle=True)
eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval1,
                                                             hp.eval2,
                                                             100000,
                                                             100000,
                                                             hp.vocab,
                                                             hp.batch_size,
コード例 #16
0
def main():
    #torch.manual_seed(42)
    # ------------
    # args
    # ------------
    parser = ArgumentParser()
    parser.add_argument('--auto_lr',
                        type=U.str2bool,
                        default=False,
                        help="Auto lr finder")
    parser.add_argument('--learning_rate', type=float, default=10e-4)
    parser.add_argument('--Loss', type=str, default='KL')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.5,
                        help="gamma balance the two losses")
    parser.add_argument('--scheduler', type=U.str2bool, default=True)
    parser.add_argument('--wd', type=float, default=2e-4)
    parser.add_argument('--moment', type=float, default=0.9)
    parser.add_argument('--batch_size', default=5, type=int)
    parser.add_argument('--iter_every',
                        default=1,
                        type=int,
                        help="Accumulate compute graph for iter_size step")
    parser.add_argument('--n_epochs', default=10, type=int)
    parser.add_argument('--model',
                        default='DLV3',
                        type=str,
                        help="FCN or DLV3 model")
    parser.add_argument('--pretrained',
                        default=False,
                        type=U.str2bool,
                        help="Use pretrained pytorch model")
    parser.add_argument('--eval_angle', default=True, type=U.str2bool,help=\
        "If true, it'll eval the model with different angle input size")
    parser.add_argument('--eval_every',
                        default=30,
                        type=int,
                        help="Eval all input rotation angle every n step")
    parser.add_argument('--rotate',
                        default=False,
                        type=U.str2bool,
                        help="Use random rotation as data augmentation")
    parser.add_argument('--angle_max',
                        default=30,
                        type=int,
                        help="Max angle rotation of input image")
    parser.add_argument('--size_img',
                        default=520,
                        type=int,
                        help="Size of input images")
    parser.add_argument('--size_crop',
                        default=480,
                        type=int,
                        help="Size of crop image during training")
    parser.add_argument('--nw',
                        default=0,
                        type=int,
                        help="Num workers for the data loader")
    parser.add_argument('--pm',
                        default=True,
                        type=U.str2bool,
                        help="Pin memory for the dataloader")
    parser.add_argument('--gpu',
                        default=0,
                        type=int,
                        help="Wich gpu to select for training")
    parser.add_argument(
        '--rot_cpu',
        default=False,
        type=U.str2bool,
        help="Apply rotation on the cpu (Help to use less gpu memory)")
    parser.add_argument('--benchmark',
                        default=False,
                        type=U.str2bool,
                        help="enable or disable backends.cudnn")
    parser.add_argument('--split',
                        default=True,
                        type=U.str2bool,
                        help="Split the dataset")
    parser.add_argument('--split_ratio',
                        default=0.3,
                        type=float,
                        help="Amount of data we used for training")
    parser.add_argument('--extra_coco', default=False, type=U.str2bool,\
         help="Use coco dataset as extra annotation for fully supervised training")
    parser.add_argument(
        '--multi_task',
        default=False,
        type=U.str2bool,
        help="Multi task training (same data for equiv and sup)")
    parser.add_argument('--dataroot_voc',
                        default='/share/DEEPLEARNING/datasets/voc2012',
                        type=str)
    parser.add_argument('--dataroot_sbd',
                        default='/share/DEEPLEARNING/datasets/sbd',
                        type=str)
    parser.add_argument('--dataroot_coco',
                        default='/share/DEEPLEARNING/datasets/coco',
                        type=str)
    parser.add_argument('--model_name',
                        type=str,
                        help="what name to use for saving")
    parser.add_argument('--save_dir', default='/data/save_model', type=str)
    parser.add_argument('--save_all_ep', default=False, type=U.str2bool,help=\
        "If true it'll save the model every epoch in save_dir")
    parser.add_argument('--save_best',
                        default=False,
                        type=U.str2bool,
                        help="If true will only save the best epoch model")
    parser.add_argument('--load_last_model',
                        default=False,
                        type=U.str2bool,
                        help="If it will load the last model saved with\
                                                                                    This parameters."
                        )
    args = parser.parse_args()
    # ------------
    # device
    # ------------
    device = torch.device(
        "cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu")
    print("device used:", device)
    # ------------
    # model
    # ------------

    # ------------
    # data
    # ------------
    if args.size_img < args.size_crop:
        raise Exception(
            'Cannot have size of input images less than size of crop')
    size_img = (args.size_img, args.size_img)
    size_crop = (args.size_crop, args.size_crop)
    train_dataset_VOC = mdset.VOCSegmentation(args.dataroot_voc,year='2012', image_set='train', \
        download=True,rotate=args.rotate,size_img=size_img,size_crop=size_crop)
    val_dataset_VOC = mdset.VOCSegmentation(args.dataroot_voc,
                                            year='2012',
                                            image_set='val',
                                            download=True)
    train_dataset_SBD = mdset.SBDataset(args.dataroot_sbd, image_set='train_noval',mode='segmentation',\
        rotate=args.rotate,size_img=size_img,size_crop=size_crop)
    #COCO dataset
    if args.extra_coco:
        extra_COCO = cu.get_coco(args.dataroot_coco,
                                 'train',
                                 rotate=args.rotate,
                                 size_img=size_img,
                                 size_crop=size_crop)
    # Concatene dataset
    train_dataset_unsup = tud.ConcatDataset(
        [train_dataset_VOC, train_dataset_SBD])

    # Split dataset
    split = args.split
    if split == True:
        train_dataset_sup = U.split_dataset(train_dataset_unsup,
                                            args.split_ratio)
    else:
        train_dataset_sup = train_dataset_unsup
    # Multi task ?
    if args.multi_task:
        train_dataset_unsup = train_dataset_sup

    # If extra coco concatene all dataset for unsupervised training
    if args.extra_coco:
        train_dataset_unsup = tud.ConcatDataset(
            [train_dataset_VOC, train_dataset_SBD, extra_COCO])

    # Print len datasets
    print("There is",len(train_dataset_sup),"images for supervised training",len(train_dataset_unsup),\
        "for equivariance loss and",len(val_dataset_VOC),"for validation")

    dataloader_train_sup = torch.utils.data.DataLoader(train_dataset_sup, batch_size=args.batch_size,num_workers=args.nw,\
        pin_memory=args.pm,shuffle=True,drop_last=True)
    dataloader_val = torch.utils.data.DataLoader(val_dataset_VOC,num_workers=args.nw,pin_memory=args.pm,\
        batch_size=args.batch_size)
    # ---------
    # Load model
    # ---------
    if args.load_last_model:
        model,save_dir = fbm.load_best_model(save_dir=args.save_dir,model_name=args.model_name,split=args.split,\
            split_ratio=args.split_ratio,batch_size =args.batch_size,rotate=args.rotate)
        print("Training will continue from this file.", save_dir)
    else:
        save_dir = U.create_save_directory(
            args.save_dir)  # Create a new save directory
        if args.model.upper() == 'FCN':
            model = models.segmentation.fcn_resnet101(
                pretrained=args.pretrained)
        elif args.model.upper() == 'DLV3':
            print('DEEPLAB MODEL')
            model = models.segmentation.deeplabv3_resnet101(
                pretrained=args.pretrained)
        else:
            raise Exception('model must be "FCN" or "DLV3"')
        model.to(device)

    # ------------
    # save
    # ------------
    print('model will be saved in', save_dir)
    U.save_hparams(args, save_dir)
    # ------------
    # training
    # ------------
    # Auto lr finding
    #if args.auto_lr==True:

    criterion_supervised = nn.CrossEntropyLoss(
        ignore_index=21)  # On ignore la classe border.
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.moment,
                                weight_decay=args.wd)
    ev.train_rot_equiv(model,args.n_epochs,dataloader_train_sup,train_dataset_unsup,dataloader_val,criterion_supervised,optimizer,\
        scheduler=args.scheduler,Loss=args.Loss,gamma=args.gamma,batch_size=args.batch_size,iter_every=args.iter_every,save_folder=save_dir,\
            model_name=args.model_name,benchmark=args.benchmark,angle_max=args.angle_max,size_img=args.size_img,\
        eval_every=args.eval_every,save_all_ep=args.save_all_ep,dataroot_voc=args.dataroot_voc,save_best=args.save_best\
            ,rot_cpu=args.rot_cpu,device=device)

    # Final evaluation
    """
コード例 #17
0
ファイル: templates.py プロジェクト: Traeyee/DickLearning
def train_template(class_model,
                   shuffle=True,
                   save_model=True):  # 大数据集耗时请关掉shuffle,调参请关掉save_model
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    logging.info("# hparams")
    hparams = Hparams()
    parser = hparams.parser
    hp = parser.parse_args()
    run_type = hp.run_type
    logdir = hp.logdir
    batch_size = hp.batch_size
    num_epochs = hp.num_epochs
    task_type = hp.task_type
    assert hp.run_type in ("new", "continue", "finetune")
    if "continue" == hp.run_type:
        load_hparams(hp, logdir)
        batch_size = hp.batch_size
        if task_type is not None:
            assert task_type == hp.task_type
        task_type = hp.task_type
    assert task_type is not None
    context = Context(hp)
    logging.info("# Prepare train/eval batches")
    logging.info("Use %s for training set", hp.train_data)
    logging.info("Use %s for evaluation set", hp.eval_data)
    eval_batches, num_eval_batches, num_eval_samples = get_batch(
        fpath=hp.eval_data,
        task_type=task_type,
        input_indices=context.input_indices,
        vocabs=context.vocabs,
        context=context,
        batch_size=batch_size,
        shuffle=False)
    train_batches, num_train_batches, num_train_samples = get_batch(
        fpath=hp.train_data,
        task_type=task_type,
        input_indices=context.input_indices,
        vocabs=context.vocabs,
        context=context,
        batch_size=batch_size,
        shuffle=shuffle)

    # create a iterator of the correct shape and type
    iterr = tf.data.Iterator.from_structure(train_batches.output_types,
                                            train_batches.output_shapes)
    inputs_and_target = iterr.get_next()

    # 照抄即可,目前不是很熟悉这些接口
    train_init_op = iterr.make_initializer(train_batches)
    eval_init_op = iterr.make_initializer(eval_batches)
    model = class_model(context)
    loss, train_op, global_step, train_summaries = model.train(
        inputs=inputs_and_target[:-1], targets=inputs_and_target[-1])
    eval_ouputs, eval_summaries = model.eval(inputs=inputs_and_target[:-1],
                                             targets=inputs_and_target[-1])
    inference_name = model.get_inference_op_name()
    logging.info("inference_node_name:%s" % inference_name)

    logging.info("# Session")
    saver = tf.train.Saver(max_to_keep=num_epochs)
    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        time_sess = time.time()
        ckpt = tf.train.latest_checkpoint(logdir)
        if ckpt is None or "new" == run_type:  # 新建
            save_hparams(hp, logdir)
            logging.info("Initializing from scratch")
            sess.run(tf.global_variables_initializer())
        else:  # continue OR finetune
            saver.restore(sess, ckpt)
            if "finetune" == hp.run_type:  # finetune
                save_hparams(hp, logdir)

        save_variable_specs(os.path.join(logdir, "var_specs"))
        save_operation_specs(os.path.join(logdir, "op_specs"))
        f_debug = open(os.path.join(logdir, "debug.txt"), "a")
        summary_writer = tf.summary.FileWriter(logdir, sess.graph)
        if hp.zero_step:
            sess.run(global_step.assign(0))

        sess.run(train_init_op)
        total_steps = num_epochs * num_train_batches
        logging.info("total_steps:%s, num_epochs:%s, num_train_batches:%s",
                     total_steps, num_epochs, num_train_batches)
        _gs = sess.run(global_step)
        logging.info("global_step is stated at %s", _gs)
        t_epoch = time.time()
        model_output = 'default'
        for i in tqdm(range(_gs, total_steps + 1)):
            ts = time.time()
            # f_debug.write("loss\n")
            # tensor_tmp = tf.get_default_graph().get_tensor_by_name("loss:0")
            # np.savetxt(f_debug, tensor_tmp.eval().reshape([1]), delimiter=', ', footer="=" * 64)
            _, _gs, _summary = sess.run(
                [train_op, global_step, train_summaries])
            epoch = math.ceil(_gs / num_train_batches)
            f_debug.write("train: epoch %s takes %s\n" %
                          (epoch, time.time() - ts))
            summary_writer.add_summary(_summary, _gs)

            if _gs and _gs % num_train_batches == 0:
                logging.info("epoch {} is done".format(epoch))

                # train loss
                _loss = sess.run(loss)
                # eval
                logging.info("# eval evaluation")
                _, _eval_summaries = sess.run([eval_init_op, eval_summaries])
                summary_writer.add_summary(_eval_summaries, _gs)
                if save_model:
                    # save checkpoint
                    logging.info("# save models")
                    model_output = "model%02dL%.2f" % (epoch, _loss)
                    ckpt_name = os.path.join(logdir, model_output)
                    saver.save(sess, ckpt_name, global_step=_gs)
                    logging.info(
                        "after training of {} epochs, {} has been saved.".
                        format(epoch, ckpt_name))
                # proceed to next epoch
                logging.info("# fall back to train mode")
                ts = time.time()
                sess.run(train_init_op)
                logging.info("fallback_train: %s\t%s\t%s takes %s" %
                             (i, _gs, epoch, time.time() - ts))
                logging.info("epoch %s takes %s", epoch, time.time() - t_epoch)
                t_epoch = time.time()
        summary_writer.close()
        logging.info("Session runs for %s", time.time() - time_sess)
        if save_model:
            # save to pb
            inference_node_name = inference_name[:inference_name.find(":")]
            graph_def = tf.graph_util.convert_variables_to_constants(
                sess, sess.graph_def, output_node_names=[inference_node_name])
            tf.train.write_graph(graph_def,
                                 logdir,
                                 '%s.pb' % model_output,
                                 as_text=False)
    f_debug.close()
    logging.info("Done")
コード例 #18
0
from tqdm import tqdm
from data_load import get_batch
from utils import save_hparams, save_variable_specs, get_hypotheses, calc_bleu
import os
from hparams import Hparams
import math
import logging

tf.device('/gpu:3')
logging.basicConfig(level=logging.INFO)

logging.info("# hparams")
hparams = Hparams()
parser = hparams.parser
hp = parser.parse_args()
save_hparams(hp, hp.logdir)  #保存超参数设置

logging.info("# Prepare train/eval batches")
train_batches, num_train_batches, num_train_samples = get_batch(hp.train1,
                                                                hp.train2,
                                                                hp.maxlen1,
                                                                hp.maxlen2,
                                                                hp.vocab,
                                                                hp.batch_size,
                                                                shuffle=True)
eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval1,
                                                             hp.eval2,
                                                             100000,
                                                             100000,
                                                             hp.vocab,
                                                             hp.batch_size,
コード例 #19
0
def main():
    #torch.manual_seed(42)

    # ------------
    # args
    # ------------
    parser = ArgumentParser()
    parser.add_argument('--auto_lr',
                        type=U.str2bool,
                        default=False,
                        help="Auto lr finder")
    parser.add_argument('--learning_rate', type=float, default=10e-4)
    parser.add_argument('--scheduler', type=U.str2bool, default=False)
    parser.add_argument('--wd', type=float, default=2e-4)
    parser.add_argument('--moment', type=float, default=0.9)
    parser.add_argument('--batch_size', default=5, type=int)
    parser.add_argument('--n_epochs', default=10, type=int)
    parser.add_argument('--model',
                        default='FCN',
                        type=str,
                        help="FCN or DLV3 model")
    parser.add_argument('--pretrained',
                        default=False,
                        type=U.str2bool,
                        help="Use pretrained pytorch model")
    parser.add_argument('--eval_angle', default=True, type=U.str2bool,help=\
        "If true, it'll eval the model with different angle input size")
    parser.add_argument('--rotate',
                        default=False,
                        type=U.str2bool,
                        help="Use random rotation as data augmentation")
    parser.add_argument('--scale',
                        default=True,
                        type=U.str2bool,
                        help="Use scale as data augmentation")
    parser.add_argument('--size_img',
                        default=520,
                        type=int,
                        help="Size of input images")
    parser.add_argument('--size_crop',
                        default=480,
                        type=int,
                        help="Size of crop image during training")
    parser.add_argument('--nw',
                        default=0,
                        type=int,
                        help="Num workers for the data loader")
    parser.add_argument('--pm',
                        default=True,
                        type=U.str2bool,
                        help="Pin memory for the dataloader")
    parser.add_argument('--gpu',
                        default=0,
                        type=int,
                        help="Wich gpu to select for training")
    parser.add_argument('--benchmark',
                        default=False,
                        type=U.str2bool,
                        help="enable or disable backends.cudnn")
    parser.add_argument('--split',
                        default=False,
                        type=U.str2bool,
                        help="Split the dataset")
    parser.add_argument('--split_ratio',
                        default=0.3,
                        type=float,
                        help="Amount of data we used for training")
    parser.add_argument('--dataroot_voc',
                        default='/share/DEEPLEARNING/datasets/voc2012/',
                        type=str)
    parser.add_argument('--dataroot_sbd',
                        default='/share/DEEPLEARNING/datasets/sbd/',
                        type=str)
    parser.add_argument('--model_name',
                        type=str,
                        help="what name to use for saving")
    parser.add_argument('--save_dir', default='/data/save_model', type=str)
    parser.add_argument('--save_all_ep', default=False, type=U.str2bool,help=\
        "If true it'll save the model every epoch in save_dir")
    parser.add_argument('--save_best',
                        default=False,
                        type=U.str2bool,
                        help="If true will only save the best epoch model")
    args = parser.parse_args()
    # ------------
    # save
    # ------------
    save_dir = U.create_save_directory(args.save_dir)
    print('model will be saved in', save_dir)
    U.save_hparams(args, save_dir)
    # ------------
    # device
    # ------------
    device = torch.device(
        "cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu")
    print("device used:", device)
    # ------------
    # model
    # ------------

    if args.model.upper() == 'FCN':
        model = models.segmentation.fcn_resnet101(pretrained=args.pretrained)
    elif args.model.upper() == 'DLV3':
        model = models.segmentation.deeplabv3_resnet101(
            pretrained=args.pretrained)
    else:
        raise Exception('model must be "FCN" or "DLV3"')
    model.to(device)
    # ------------
    # data
    # ------------
    if args.size_img < args.size_crop:
        raise Exception(
            'Cannot have size of input images less than size of crop')
    size_img = (args.size_img, args.size_img)
    size_crop = (args.size_crop, args.size_crop)
    train_dataset_VOC = mdset.VOCSegmentation(args.dataroot_voc,year='2012', image_set='train', \
        download=True,rotate=args.rotate,scale=args.scale,size_img=size_img,size_crop=size_crop)
    val_dataset_VOC = mdset.VOCSegmentation(args.dataroot_voc,
                                            year='2012',
                                            image_set='val',
                                            download=True)
    train_dataset_SBD = mdset.SBDataset(args.dataroot_sbd, image_set='train_noval',mode='segmentation',\
        rotate=args.rotate,scale=args.scale,size_img=size_img,size_crop=size_crop)
    # Concatene dataset
    train_dataset = tud.ConcatDataset([train_dataset_VOC, train_dataset_SBD])
    split = args.split
    if split == True:
        train_dataset = U.split_dataset(train_dataset, args.split_ratio)
    # Print len datasets
    print("There is", len(train_dataset), "images for training and",
          len(val_dataset_VOC), "for validation")
    dataloader_train = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size,num_workers=args.nw,\
        pin_memory=args.pm,shuffle=True,drop_last=True)#,collate_fn=U.my_collate)
    dataloader_val = torch.utils.data.DataLoader(val_dataset_VOC,num_workers=args.nw,pin_memory=args.pm,\
        batch_size=args.batch_size)
    # Decide which device we want to run on

    # ------------
    # training
    # ------------
    # Auto lr finding
    #if args.auto_lr==True:

    criterion = nn.CrossEntropyLoss(
        ignore_index=21)  # On ignore la classe border.
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.moment,
                                weight_decay=args.wd)
    ev.train_fully_supervised(model=model,n_epochs=args.n_epochs,train_loader=dataloader_train,val_loader=dataloader_val,\
        criterion=criterion,optimizer=optimizer,save_folder=save_dir,scheduler=args.scheduler,model_name=args.model_name,\
            benchmark=args.benchmark, save_best=args.save_best,save_all_ep=args.save_all_ep,device=device,num_classes=21)

    # Final evaluation
    if args.eval_angle:
        d_iou = ev.eval_model_all_angle(model,
                                        args.size_img,
                                        args.dataroot_voc,
                                        train=True,
                                        device=device)
        U.save_eval_angle(d_iou, save_dir)
        d_iou = ev.eval_model_all_angle(model,
                                        args.size_img,
                                        args.dataroot_voc,
                                        train=False,
                                        device=device)
        U.save_eval_angle(d_iou, save_dir)
コード例 #20
0
 def __init__(self):
     hparams = Hparams()
     parser = hparams.parser
     hp = parser.parse_args()
     save_hparams(hp, hp.logdir)
     self.hp = hp
コード例 #21
0
def run_training(args):
    it_network = ImageTransformNet(
        input_shape=hparams['input_size'],
        residual_layers=hparams['residual_layers'],
        residual_filters=hparams['residual_filters'],
        initializer=hparams['initializer'])
    loss_network = LossNetwork(hparams['style_layers'])

    optimizer = tf.keras.optimizers.Adam(
        learning_rate=hparams['learning_rate'])
    optimizer = mixed_precision.LossScaleOptimizer(optimizer)

    ckpt_dir = os.path.join(args.name, 'pretrained')
    ckpt = tf.train.Checkpoint(network=it_network,
                               optimizer=optimizer,
                               step=tf.Variable(0))
    ckpt_manager = tf.train.CheckpointManager(
        ckpt, directory=ckpt_dir, max_to_keep=args.max_ckpt_to_keep)

    ckpt.restore(ckpt_manager.latest_checkpoint)
    log_dir = os.path.join(args.name, 'log_dir')
    writer = tf.summary.create_file_writer(logdir=log_dir)

    print('\n####################################################')
    print('Perceptual Losses for Real-Time Style Transfer Train')
    print('####################################################\n')
    if ckpt_manager.latest_checkpoint:
        print('Restored {} from: {}'.format(args.name,
                                            ckpt_manager.latest_checkpoint))
    else:
        print('Initializing {} from scratch'.format(args.name))
        save_hparams(args.name)
    print('Style image: {}'.format(args.style_img))
    print('Start TensorBoard with: $ tensorboard --logdir ./\n')

    total_loss_avg = tf.keras.metrics.Mean()
    style_loss_avg = tf.keras.metrics.Mean()
    content_loss_avg = tf.keras.metrics.Mean()

    save_hparams(args.name)

    style_img = convert(args.style_img)
    target_feature_maps = loss_network(style_img[tf.newaxis, :])
    target_gram_matrices = [gram_matrix(x) for x in target_feature_maps]
    num_style_layers = len(target_feature_maps)

    dataset = create_ds(args)
    test_content_batch = create_test_batch(args)

    @tf.function
    def test_step(batch):
        prediction = it_network(batch, training=False)
        #prediction_norm = np.array(tf.clip_by_value(prediction, 0, 1)*255, dtype=np.uint8) # Poor quality, no convergence
        #prediction_norm = np.array(tf.clip_by_value(prediction, 0, 255), dtype=np.uint8)
        return deprocess(prediction)

    @tf.function
    def train_step(batch):
        with tf.GradientTape() as tape:
            output_batch = it_network(batch, training=True)
            output_batch = 255 * (output_batch + 1.0) / 2.0  # float deprocess

            # Feed target and output batch through loss_network
            target_batch_feature_maps = loss_network(batch)
            output_batch_feature_maps = loss_network(output_batch)

            c_loss = content_loss(
                target_batch_feature_maps[hparams['content_layer_index']],
                output_batch_feature_maps[hparams['content_layer_index']])
            c_loss *= hparams['content_weight']

            # Get output gram_matrix
            output_gram_matrices = [
                gram_matrix(x) for x in output_batch_feature_maps
            ]
            s_loss = style_loss(target_gram_matrices, output_gram_matrices)
            s_loss *= hparams['style_weight'] / num_style_layers

            total_loss = c_loss + s_loss
            scaled_loss = optimizer.get_scaled_loss(total_loss)

        scaled_gradients = tape.gradient(scaled_loss,
                                         it_network.trainable_variables)
        gradients = optimizer.get_unscaled_gradients(scaled_gradients)
        #gradients = tape.gradient(total_loss, it_network.trainable_variables)
        optimizer.apply_gradients(
            zip(gradients, it_network.trainable_variables))

        total_loss_avg(total_loss)
        content_loss_avg(c_loss)
        style_loss_avg(s_loss)

    total_start = time.time()
    for batch_image in dataset:
        start = time.time()
        train_step(batch_image)

        ckpt.step.assign_add(1)
        step_int = int(ckpt.step)  # cast ckpt.step

        if (step_int) % args.ckpt_interval == 0:
            print('Time taken for step {} is {} sec'.format(
                step_int,
                time.time() - start))
            ckpt_manager.save(step_int)
            prediction_norm = test_step(test_content_batch)

            with writer.as_default():
                tf.summary.scalar('total loss',
                                  total_loss_avg.result(),
                                  step=step_int)
                tf.summary.scalar('content loss',
                                  content_loss_avg.result(),
                                  step=step_int)
                tf.summary.scalar('style loss',
                                  style_loss_avg.result(),
                                  step=step_int)
                images = np.reshape(prediction_norm,
                                    (-1, hparams['input_size'][0],
                                     hparams['input_size'][1], 3))
                tf.summary.image('generated image',
                                 images,
                                 step=step_int,
                                 max_outputs=len(test_content_batch))

            print('Total loss: {:.4f}'.format(total_loss_avg.result()))
            print('Content loss: {:.4f}'.format(content_loss_avg.result()))
            print('Style loss: {:.4f}'.format(style_loss_avg.result()))
            print('Total time: {} sec\n'.format(time.time() - total_start))
            total_loss_avg.reset_states()
            content_loss_avg.reset_states()
            style_loss_avg.reset_states()
コード例 #22
0
ファイル: train.py プロジェクト: Traeyee/DickLearning
x1, x2, score = iterr.get_next()

# 照抄即可,目前不是很熟悉这些接口
train_init_op = iterr.make_initializer(train_batches)

model = DSSM(context)
loss, train_op, global_step, train_summaries = model.train(x1, x2, score)

logging.info("# Session")
saver = tf.train.Saver(max_to_keep=num_epochs)
config = tf.ConfigProto(allow_soft_placement=True)
with tf.Session(config=config) as sess:
    time_sess = time.time()
    ckpt = tf.train.latest_checkpoint(logdir)
    if ckpt is None or "new" == run_type:  # 新建
        save_hparams(hp, logdir)
        logging.info("Initializing from scratch")
        sess.run(tf.global_variables_initializer())
    else:  # continue OR finetune
        saver.restore(sess, ckpt)
        if "finetune" == hp.run_type:  # finetune
            save_hparams(hp, logdir)
        else:  # continue
            batch_size = hp.batch_size

    save_variable_specs(os.path.join(logdir, "var_specs"))
    save_operation_specs(os.path.join(logdir, "op_specs"))
    f_debug = open(os.path.join(logdir, "debug.txt"), "a")
    summary_writer = tf.summary.FileWriter(logdir, sess.graph)
    if hp.zero_step:
        sess.run(global_step.assign(0))
コード例 #23
0
from tqdm import tqdm
from data_load import get_batch
from utils import save_hparams, save_variable_specs, get_hypotheses, calc_bleu
import os
from hparams import Hparams
import math
import logging

logging.basicConfig(level=logging.INFO)
os.environ['CUDA_VISIBLE_DEVICES'] = "5"

logging.info("# hparams")
hparams = Hparams()
parser = hparams.parser
hp = parser.parse_args()
save_hparams(hp, hp.logdir)

logging.info("# Prepare train/eval batches")
train_batches, num_train_batches, num_train_samples = get_batch(hp.train1,
                                                                hp.train2,
                                                                hp.maxlen1,
                                                                hp.maxlen2,
                                                                hp.vocab,
                                                                hp.batch_size,
                                                                shuffle=True)
eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval1,
                                                             hp.eval2,
                                                             100000,
                                                             100000,
                                                             hp.vocab,
                                                             hp.batch_size,
コード例 #24
0
def main():
    #torch.manual_seed(42)
    # ------------
    # args
    # ------------
    parser = ArgumentParser()
    parser.add_argument('--auto_lr',
                        type=U.str2bool,
                        default=False,
                        help="Auto lr finder")
    parser.add_argument('--learning_rate', type=float, default=10e-4)
    parser.add_argument('--Loss', type=str, default='KL')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.5,
                        help="gamma balance the two losses")
    parser.add_argument('--scheduler', type=U.str2bool, default=False)
    parser.add_argument('--wd', type=float, default=2e-4)
    parser.add_argument('--moment', type=float, default=0.9)
    parser.add_argument('--batch_size', default=5, type=int)
    parser.add_argument('--n_epochs', default=10, type=int)
    parser.add_argument('--model',
                        default='FCN',
                        type=str,
                        help="FCN or DLV3 model")
    parser.add_argument('--pretrained',
                        default=False,
                        type=U.str2bool,
                        help="Use pretrained pytorch model")
    parser.add_argument('--eval_every',
                        default=30,
                        type=int,
                        help="Eval all input rotation angle every n step")
    parser.add_argument('--rotate',
                        default=False,
                        type=U.str2bool,
                        help="Use random rotation as data augmentation")
    parser.add_argument('--scale',
                        default=True,
                        type=U.str2bool,
                        help="Use scale as data augmentation")
    parser.add_argument('--scale_factor',
                        default=30,
                        type=float,
                        nargs='+',
                        help="Scale image between min*size - max*size")
    parser.add_argument('--size_img',
                        default=520,
                        type=int,
                        help="Size of input images")
    parser.add_argument('--size_crop',
                        default=480,
                        type=int,
                        help="Size of crop image during training")
    parser.add_argument('--nw',
                        default=0,
                        type=int,
                        help="Num workers for the data loader")
    parser.add_argument('--pm',
                        default=True,
                        type=U.str2bool,
                        help="Pin memory for the dataloader")
    parser.add_argument('--gpu',
                        default=0,
                        type=int,
                        help="Wich gpu to select for training")
    parser.add_argument('--benchmark',
                        default=False,
                        type=U.str2bool,
                        help="enable or disable backends.cudnn")
    parser.add_argument('--split',
                        default=True,
                        type=U.str2bool,
                        help="Split the dataset")
    parser.add_argument('--split_ratio',
                        default=0.3,
                        type=float,
                        help="Amount of data we used for training")
    parser.add_argument(
        '--multi_task',
        default=False,
        type=U.str2bool,
        help="Multi task training (same data for equiv and sup)")
    parser.add_argument('--dataroot_voc', default='/data/voc2012', type=str)
    parser.add_argument('--dataroot_sbd', default='/data/sbd', type=str)
    parser.add_argument('--model_name',
                        type=str,
                        help="what name to use for saving")
    parser.add_argument('--save_dir', default='/data/save_model', type=str)
    parser.add_argument('--save_all_ep', default=False, type=U.str2bool,help=\
        "If true it'll save the model every epoch in save_dir")
    parser.add_argument('--save_best',
                        default=False,
                        type=U.str2bool,
                        help="If true will only save the best epoch model")
    args = parser.parse_args()
    # ------------
    # save
    # ------------
    save_dir = U.create_save_directory(args.save_dir)
    print('model will be saved in', save_dir)
    U.save_hparams(args, save_dir)
    # ------------
    # device
    # ------------
    device = torch.device(
        "cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu")
    print("device used:", device)
    # ------------
    # model
    # ------------

    if args.model.upper() == 'FCN':
        model = models.segmentation.fcn_resnet101(pretrained=args.pretrained)
    elif args.model.upper() == 'DLV3':
        model = models.segmentation.deeplabv3_resnet101(
            pretrained=args.pretrained)
    else:
        raise Exception('model must be "FCN" or "DLV3"')
    model.to(device)
    # ------------
    # data
    # ------------
    if args.size_img < args.size_crop:
        raise Exception(
            'Cannot have size of input images less than size of crop')
    size_img = (args.size_img, args.size_img)
    size_crop = (args.size_crop, args.size_crop)
    train_dataset_VOC = mdset.VOCSegmentation(args.dataroot_voc,year='2012', image_set='train', \
        download=True,rotate=args.rotate,scale=args.scale,size_img=size_img,size_crop=size_crop)
    val_dataset_VOC = mdset.VOCSegmentation(args.dataroot_voc,
                                            year='2012',
                                            image_set='val',
                                            download=True)
    train_dataset_SBD = mdset.SBDataset(args.dataroot_sbd, image_set='train_noval',mode='segmentation',\
        rotate=args.rotate,scale=args.scale,size_img=size_img,size_crop=size_crop)
    # Concatene dataset
    train_dataset_unsup = tud.ConcatDataset(
        [train_dataset_VOC, train_dataset_SBD])

    # Split dataset
    split = args.split
    if split == True:
        train_dataset_sup = U.split_dataset(train_dataset_unsup,
                                            args.split_ratio)
    # Multi task ?
    if args.multi_task:
        train_dataset_unsup = train_dataset_sup
    # Print len datasets
    print("There is",len(train_dataset_sup),"images for supervised training",len(train_dataset_unsup),\
        "for equivariance loss and",len(val_dataset_VOC),"for validation")

    dataloader_train_sup = torch.utils.data.DataLoader(train_dataset_sup, batch_size=args.batch_size,num_workers=args.nw,\
        pin_memory=args.pm,shuffle=True,drop_last=True)
    dataloader_val = torch.utils.data.DataLoader(val_dataset_VOC,num_workers=args.nw,pin_memory=args.pm,\
        batch_size=args.batch_size)
    # Decide which device we want to run on

    # ------------
    # training
    # ------------
    # Auto lr finding
    #if args.auto_lr==True:
    scale_factor = (0.2, 0.8)
    criterion_supervised = nn.CrossEntropyLoss(
        ignore_index=21)  # On ignore la classe border.
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.moment,
                                weight_decay=args.wd)
    ev.train_scale_equiv(model,args.n_epochs,dataloader_train_sup,train_dataset_unsup,dataloader_val,criterion_supervised,optimizer,\
        scheduler=args.scheduler,Loss=args.Loss,gamma=args.gamma,batch_size=args.batch_size,save_folder=save_dir,\
            model_name=args.model_name,benchmark=args.benchmark,scale_factor = scale_factor,\
                size_img=args.size_img,save_all_ep=args.save_all_ep,dataroot_voc=args.dataroot_voc,\
                    save_best=args.save_best,device=device)