Beispiel #1
0
def get_ng_and_semiprog():
    # open model
    model = common.get_model('NN-Lower')
    # get data
    ds = open_data('training').sel(time=slice(100,115))

    def integrate_moist(src):
        return (src * ds.layer_mass).sum('z')/1000

    q2 = compute_apparent_source(ds.QT, 86400 * ds.FQT)


    ng = xr.Dataset({
        netprec_name: -integrate_moist(q2),
        pw_name: integrate_moist(ds.QT)
    })

    # semiprognostic
    predicted_srcs = model.predict(ds)
    ds = xr.Dataset({
        netprec_name: -integrate_moist(predicted_srcs['QT']),
        pw_name: integrate_moist(ds.QT)
    })
    
    return ng, ds
Beispiel #2
0
    def __init__(self, config_path):
        # 설정 값을 읽는다.
        print('Reading config file...')
        self.config = common.Config(config_path)

        # 학습 영상 파일들을 읽는다.
        print('Reading image files...')
        image_reader = ImageReader(self.config.image_width,
                                   self.config.image_height)

        positive_samples = image_reader.read_from(
            self.config.positive_data_path)
        negative_samples = image_reader.read_from(
            self.config.negative_data_path)

        # 학습 데이터 형태로 변환한다.
        print('Preparing training files...')
        self.train_data = TrainData(positive_samples, negative_samples)

        print('# of images = {} positives vs. {} negatives'.format(
            self.train_data.positive_count, self.train_data.negative_count))

        # 학습 모델을 생성한다.
        print('Building training model...')
        shape = (self.config.image_width, self.config.image_height, 3)
        self.model = common.get_model(shape, self.config.model_type)
Beispiel #3
0
    def __init__(self, config_path):
        """Load trained_model."""

        # 환경 설정 값을 읽는다.
        print('Reading config file...')
        self.config = common.Config(config_path)

        # 학습 모델을 읽는다.
        shape = (self.config.image_width, self.config.image_height, 3)
        self.model = common.get_model(shape, self.config.model_type)
        self.model.load_weights(self.config.model_path)
Beispiel #4
0
 def execute(cls, args: Args):
     model_spec = _extract_model_spec(args)
     model = get_model(args.model, model_spec.get_model)
     cumulative_time = 0.0
     for i in range(args.num_iterations):
         start = time()
         model.predict(x=model_spec.xs, verbose=0)
         iteration_time = time() - start
         if args.iteration_time:
             print(f'Iteration {i} time: {iteration_time * 1e6}')
         cumulative_time += iteration_time
     print(
         f'Keras performance for model `{model_spec.name}` : {cumulative_time * 1e6}'
     )
Beispiel #5
0
def val():
    """Validation."""
    torch.backends.cudnn.benchmark = True

    # model
    model, model_wrapper = mc.get_model()
    ema = mc.setup_ema(model)
    criterion = torch.nn.CrossEntropyLoss(reduction='none').cuda()
    # TODO(meijieru): cal loss on all GPUs instead only `cuda:0` when non
    # distributed

    # check pretrained
    if FLAGS.pretrained:
        checkpoint = torch.load(FLAGS.pretrained,
                                map_location=lambda storage, loc: storage)
        if ema:
            ema.load_state_dict(checkpoint['ema'])
            ema.to(get_device(model))
        model_wrapper.load_state_dict(checkpoint['model'])
        logging.info('Loaded model {}.'.format(FLAGS.pretrained))

    if udist.is_master():
        logging.info(model_wrapper)

    # data
    (train_transforms, val_transforms, test_transforms) = \
        dataflow.data_transforms(FLAGS)
    (train_set, val_set, test_set) = dataflow.dataset(train_transforms,
                                                      val_transforms,
                                                      test_transforms, FLAGS)
    _, calib_loader, _, test_loader = dataflow.data_loader(
        train_set, val_set, test_set, FLAGS)

    if udist.is_master():
        logging.info('Start testing.')
    FLAGS._global_step = 0
    test_meters = mc.get_meters('test')
    validate(0, calib_loader, test_loader, criterion, test_meters,
             model_wrapper, ema, 'test')
    return
Beispiel #6
0
def get_data():
    model = common.get_model('NN-Lower')
    ds = open_data('training')\
               .chunk({'time': 1})\
               .pipe(assign_apparent_sources)

    outputs = map_dataset(ds, model.call_with_xr, 'time')

    for key in outputs:
        ds['F' + key + 'NN'] = outputs[key]

    output = xr.Dataset()

    output['net_moist_nn'] = integrate_q2(ds['FQTNN'], ds.layer_mass)
    output['net_heat_nn'] = integrate_q1(ds['FSLINN'], ds.layer_mass)
    output['net_moist'] = integrate_q2(ds['Q2'], ds.layer_mass)
    output['net_heat'] = integrate_q1(ds['Q1'], ds.layer_mass)
    output['Q1'] = ds['Q1']
    output['Q2'] = ds['Q2']
    output['Q1nn'] = ds['FSLINN']
    output['Q2nn'] = ds['FQTNN']

    return output
Beispiel #7
0
                                    # collate_fn=collate_fn
                                    )

    valid_triplets_loader = DataLoader(weak_valid_dl_triplet, batch_size=batch_size, shuffle=False,
                                       num_workers=cfg.num_workers,
                                       drop_last=True, collate_fn=collate_fn)

    test_triplets_loader = DataLoader(test_triplets, batch_size=batch_size, shuffle=False,
                                      num_workers=cfg.num_workers,
                                      drop_last=True, collate_fn=collate_fn)

    # #########
    # # Model and optimizer
    # ########
    if resume_training is None:
        model_triplet, state = get_model(state, f_args)
        optimizer, state = get_optimizer(model_triplet, state)

    LOG.info(model_triplet)
    pytorch_total_params = sum(p.numel() for p in model_triplet.parameters() if p.requires_grad)
    LOG.info("number of parameters in the model: {}".format(pytorch_total_params))
    model_triplet.train()
    # scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5, verbose=True)
    LOG.info(optimizer)
    model_triplet = to_cuda_if_available(model_triplet)

    # ##########
    # # Callbacks
    # ##########
    if cfg.save_best:
        save_best_call = SaveBest(val_comp="sup")
Beispiel #8
0
def train_val_test():
    """Train and val."""
    torch.backends.cudnn.benchmark = True  # For acceleration

    # model
    model, model_wrapper = mc.get_model()
    ema = mc.setup_ema(model)
    criterion = torch.nn.CrossEntropyLoss(reduction='mean').cuda()
    criterion_smooth = optim.CrossEntropyLabelSmooth(
        FLAGS.model_kwparams['num_classes'],
        FLAGS['label_smoothing'],
        reduction='mean').cuda()
    if model.task == 'segmentation':
        criterion = CrossEntropyLoss().cuda()
        criterion_smooth = CrossEntropyLoss().cuda()
    if FLAGS.dataset == 'coco':
        criterion = JointsMSELoss(use_target_weight=True).cuda()
        criterion_smooth = JointsMSELoss(use_target_weight=True).cuda()

    if FLAGS.get('log_graph_only', False):
        if udist.is_master():
            _input = torch.zeros(1, 3, FLAGS.image_size,
                                 FLAGS.image_size).cuda()
            _input = _input.requires_grad_(True)
            if isinstance(model_wrapper,
                          (torch.nn.DataParallel,
                           udist.AllReduceDistributedDataParallel)):
                mc.summary_writer.add_graph(model_wrapper.module, (_input, ),
                                            verbose=True)
            else:
                mc.summary_writer.add_graph(model_wrapper, (_input, ),
                                            verbose=True)
        return

    # check pretrained
    if FLAGS.pretrained:
        checkpoint = torch.load(FLAGS.pretrained,
                                map_location=lambda storage, loc: storage)
        if ema:
            ema.load_state_dict(checkpoint['ema'])
            ema.to(get_device(model))
        # update keys from external models
        if isinstance(checkpoint, dict) and 'model' in checkpoint:
            checkpoint = checkpoint['model']
        if (hasattr(FLAGS, 'pretrained_model_remap_keys')
                and FLAGS.pretrained_model_remap_keys):
            new_checkpoint = {}
            new_keys = list(model_wrapper.state_dict().keys())
            old_keys = list(checkpoint.keys())
            for key_new, key_old in zip(new_keys, old_keys):
                new_checkpoint[key_new] = checkpoint[key_old]
                if udist.is_master():
                    logging.info('remap {} to {}'.format(key_new, key_old))
            checkpoint = new_checkpoint
        model_wrapper.load_state_dict(checkpoint)
        if udist.is_master():
            logging.info('Loaded model {}.'.format(FLAGS.pretrained))
    optimizer = optim.get_optimizer(model_wrapper, FLAGS)

    # check resume training
    if FLAGS.resume:
        checkpoint = torch.load(os.path.join(FLAGS.resume,
                                             'latest_checkpoint.pt'),
                                map_location=lambda storage, loc: storage)
        model_wrapper = checkpoint['model'].cuda()
        model = model_wrapper.module
        # model = checkpoint['model'].module
        optimizer = checkpoint['optimizer']
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()
        # model_wrapper.load_state_dict(checkpoint['model'])
        # optimizer.load_state_dict(checkpoint['optimizer'])
        if ema:
            # ema.load_state_dict(checkpoint['ema'])
            ema = checkpoint['ema'].cuda()
            ema.to(get_device(model))
        last_epoch = checkpoint['last_epoch']
        lr_scheduler = optim.get_lr_scheduler(optimizer,
                                              FLAGS,
                                              last_epoch=(last_epoch + 1) *
                                              FLAGS._steps_per_epoch)
        lr_scheduler.last_epoch = (last_epoch + 1) * FLAGS._steps_per_epoch
        best_val = extract_item(checkpoint['best_val'])
        train_meters, val_meters = checkpoint['meters']
        FLAGS._global_step = (last_epoch + 1) * FLAGS._steps_per_epoch
        if udist.is_master():
            logging.info('Loaded checkpoint {} at epoch {}.'.format(
                FLAGS.resume, last_epoch))
    else:
        lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS)
        # last_epoch = lr_scheduler.last_epoch
        last_epoch = -1
        best_val = 1.
        if not FLAGS.distill:
            train_meters = mc.get_meters('train', FLAGS.prune_params['method'])
            val_meters = mc.get_meters('val')
        else:
            train_meters = mc.get_distill_meters('train',
                                                 FLAGS.prune_params['method'])
            val_meters = mc.get_distill_meters('val')
        if FLAGS.model_kwparams.task == 'segmentation':
            best_val = 0.
            if not FLAGS.distill:
                train_meters = mc.get_seg_meters('train',
                                                 FLAGS.prune_params['method'])
                val_meters = mc.get_seg_meters('val')
            else:
                train_meters = mc.get_seg_distill_meters(
                    'train', FLAGS.prune_params['method'])
                val_meters = mc.get_seg_distill_meters('val')
        FLAGS._global_step = 0

    if not FLAGS.resume and udist.is_master():
        logging.info(model_wrapper)
    assert FLAGS.profiling, '`m.macs` is used for calculating penalty'
    # if udist.is_master():
    #     model.apply(lambda m: print(m))
    if FLAGS.profiling:
        if 'gpu' in FLAGS.profiling:
            mc.profiling(model, use_cuda=True)
        if 'cpu' in FLAGS.profiling:
            mc.profiling(model, use_cuda=False)

    if FLAGS.dataset == 'cityscapes':
        (train_set, val_set,
         test_set) = seg_dataflow.cityscapes_datasets(FLAGS)
        segval = SegVal(num_classes=19)
    elif FLAGS.dataset == 'ade20k':
        (train_set, val_set, test_set) = seg_dataflow.ade20k_datasets(FLAGS)
        segval = SegVal(num_classes=150)
    elif FLAGS.dataset == 'coco':
        (train_set, val_set, test_set) = seg_dataflow.coco_datasets(FLAGS)
        # print(len(train_set), len(val_set))  # 149813 104125
        segval = None
    else:
        # data
        (train_transforms, val_transforms,
         test_transforms) = dataflow.data_transforms(FLAGS)
        (train_set, val_set,
         test_set) = dataflow.dataset(train_transforms, val_transforms,
                                      test_transforms, FLAGS)
        segval = None
    (train_loader, calib_loader, val_loader,
     test_loader) = dataflow.data_loader(train_set, val_set, test_set, FLAGS)

    # get bn's weights
    if FLAGS.prune_params.use_transformer:
        FLAGS._bn_to_prune, FLAGS._bn_to_prune_transformer = prune.get_bn_to_prune(
            model, FLAGS.prune_params)
    else:
        FLAGS._bn_to_prune = prune.get_bn_to_prune(model, FLAGS.prune_params)
    rho_scheduler = prune.get_rho_scheduler(FLAGS.prune_params,
                                            FLAGS._steps_per_epoch)

    if FLAGS.test_only and (test_loader is not None):
        if udist.is_master():
            logging.info('Start testing.')
        test_meters = mc.get_meters('test')
        validate(last_epoch, calib_loader, test_loader, criterion, test_meters,
                 model_wrapper, ema, 'test')
        return

    # already broadcast by AllReduceDistributedDataParallel
    # optimizer load same checkpoint/same initialization

    if udist.is_master():
        logging.info('Start training.')

    for epoch in range(last_epoch + 1, FLAGS.num_epochs):
        # train
        results = run_one_epoch(epoch,
                                train_loader,
                                model_wrapper,
                                criterion_smooth,
                                optimizer,
                                lr_scheduler,
                                ema,
                                rho_scheduler,
                                train_meters,
                                phase='train')

        if (epoch + 1) % FLAGS.eval_interval == 0:
            # val
            results, model_eval_wrapper = validate(epoch, calib_loader,
                                                   val_loader, criterion,
                                                   val_meters, model_wrapper,
                                                   ema, 'val', segval, val_set)

            if FLAGS.prune_params['method'] is not None and FLAGS.prune_params[
                    'bn_prune_filter'] is not None:
                prune_threshold = FLAGS.model_shrink_threshold  # 1e-3
                masks = prune.cal_mask_network_slimming_by_threshold(
                    get_prune_weights(model_eval_wrapper), prune_threshold
                )  # get mask for all bn weights (depth-wise)
                FLAGS._bn_to_prune.add_info_list('mask', masks)
                flops_pruned, infos = prune.cal_pruned_flops(
                    FLAGS._bn_to_prune)
                log_pruned_info(mc.unwrap_model(model_eval_wrapper),
                                flops_pruned, infos, prune_threshold)
                if not FLAGS.distill:
                    if flops_pruned >= FLAGS.model_shrink_delta_flops \
                            or epoch == FLAGS.num_epochs - 1:
                        ema_only = (epoch == FLAGS.num_epochs - 1)
                        shrink_model(model_wrapper, ema, optimizer,
                                     FLAGS._bn_to_prune, prune_threshold,
                                     ema_only)
            model_kwparams = mb.output_network(mc.unwrap_model(model_wrapper))

            if udist.is_master():
                if FLAGS.model_kwparams.task == 'classification' and results[
                        'top1_error'] < best_val:
                    best_val = results['top1_error']
                    logging.info(
                        'New best validation top1 error: {:.4f}'.format(
                            best_val))

                    save_status(model_wrapper, model_kwparams, optimizer, ema,
                                epoch, best_val, (train_meters, val_meters),
                                os.path.join(FLAGS.log_dir, 'best_model'))

                elif FLAGS.model_kwparams.task == 'segmentation' and FLAGS.dataset != 'coco' and results[
                        'mIoU'] > best_val:
                    best_val = results['mIoU']
                    logging.info('New seg mIoU: {:.4f}'.format(best_val))

                    save_status(model_wrapper, model_kwparams, optimizer, ema,
                                epoch, best_val, (train_meters, val_meters),
                                os.path.join(FLAGS.log_dir, 'best_model'))
                elif FLAGS.dataset == 'coco' and results > best_val:
                    best_val = results
                    logging.info('New Result: {:.4f}'.format(best_val))
                    save_status(model_wrapper, model_kwparams, optimizer, ema,
                                epoch, best_val, (train_meters, val_meters),
                                os.path.join(FLAGS.log_dir, 'best_model'))

                # save latest checkpoint
                save_status(model_wrapper, model_kwparams, optimizer, ema,
                            epoch, best_val, (train_meters, val_meters),
                            os.path.join(FLAGS.log_dir, 'latest_checkpoint'))

    return
Beispiel #9
0
                                                   args['glove'],
                                                   args['embedding_size'])

# create tester
print("===> creating dataloaders ...")
val_loader = common.get_data_loader(args['model'],
                                    'test',
                                    d_word_index,
                                    args['batch_size'],
                                    args['max_sentence_length'],
                                    pdtb_category=args['pdtb_category'])

# load model,optimizer and loss
model, optimizer, criterion = common.get_model(
    model=args['model'],
    model_path=results_path,
    lr=args['learning_rate'],
    weight_decay=args['weight_decay'],
    pdtb_category=args['pdtb_category'])
print(optimizer)
print(criterion)

if args['cuda']:
    torch.backends.cudnn.enabled = True
    cudnn.benchmark = True
    model.cuda()
    criterion = criterion.cuda()

if args['model'] == 'grn16':
    common.test_grn16(val_loader, model, criterion, args['cuda'],
                      args['print_freq'])
elif args['model'] == 'keann':
Beispiel #10
0
                                    d_word_index,
                                    args['batch_size'],
                                    args['max_sentence_length'],
                                    pdtb_category=args['pdtb_category'])
print('===> dataloader creatin: {t:.3f}'.format(t=time.time() - end))

# create model
print("===> creating rnn model ...")
vocab_size = len(d_word_index)
model, optimizer, criterion = common.get_model(
    model=args['model'],
    model_path=results_path,
    vocab_size=vocab_size,
    embedding_size=embedding_size,
    classes=args['classes'],
    rnn_model=args['rnn'],
    mean_seq=args['mean_seq'],
    hidden_size=args['hidden_size'],
    embed=embed,
    layers=args['layers'],
    lr=args['learning_rate'],
    weight_decay=args['weight_decay'],
    pdtb_category=args['pdtb_category'])
print(model)
print(optimizer)
print(criterion)

if args['cuda']:
    torch.backends.cudnn.enabled = True
    cudnn.benchmark = True
    model.cuda()
    criterion = criterion.cuda()
Beispiel #11
0
    params_name = {
        "early_stopping": cfg.early_stopping,
        "conv_dropout": cfg.conv_dropout,
        "frames": cfg.frames_in_sec,
    }
    params_name.update(args.__dict__)

    base_model_name = get_model_name(params_name)
    # Model
    state = {
        "scaler": scaler.state_dict(),
        "many_hot_encoder": many_hot_encoder.state_dict(),
        "args": vars(args),
    }
    model, state = get_model(state, args)
    optimizer, state = get_optimizer(model, state)
    model = to_cuda_if_available(model)
    LOG.info(model)

    # ##########
    # # Callbacks
    # ##########
    if cfg.save_best:
        save_best_call = SaveBest(val_comp="sup")
    if cfg.early_stopping is not None:
        early_stopping_call = EarlyStopping(patience=cfg.early_stopping,
                                            val_comp="sup")
    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

    # x, y = next(iter(train_loader))
Beispiel #12
0
                                    d_word_index,
                                    args.batch_size,
                                    args.max_sentence_length,
                                    pdtb_category=args.pdtb_category)
print('===> dataloader creatin: {t:.3f}'.format(t=time.time() - end))

# create model
print("===> creating rnn model ...")
vocab_size = len(d_word_index)
model, optimizer, criterion = common.get_model(
    model=args.model,
    model_path=results_path,
    vocab_size=vocab_size,
    embedding_size=embedding_size,
    classes=args.classes,
    rnn_model=args.rnn,
    mean_seq=args.mean_seq,
    hidden_size=args.hidden_size,
    embed=embed,
    layers=args.layers,
    lr=args.lr,
    weight_decay=args.weight_decay,
    pdtb_category=args.pdtb_category)
print(model)
print(optimizer)
print(criterion)

if args.cuda:
    torch.backends.cudnn.enabled = True
    cudnn.benchmark = True
    model.cuda()
    criterion = criterion.cuda()
Beispiel #13
0
 def execute(cls, args: Args):
     model_spec = _extract_model_spec(args)
     model = get_model(args.model, model_spec.get_model)
     scores = model.evaluate(x=model_spec.xs, y=model_spec.ys, verbose=0)
     print(
         f'Keras accuracy for the model `{model_spec.name}` is {scores[1]}')
Beispiel #14
0
 def execute(cls, args: Args):
     model_spec = _extract_model_spec(args)
     model = get_model(args.model, model_spec.get_model)
     model_file_path = get_model_file_path(model_name=args.model)
     save_model(model, model_file_path)
     print(f'Model has been saved to "{model_file_path}"')
Beispiel #15
0
from src.data import open_data
import common
import pandas as pd

# bootstrap sample size
n = 20
hatch_threshold = 10

# compute jacobians
training = open_data('training')
training['region'] = common.get_regions(training.y)
tropics = training.isel(y=slice(30,34)).load()
tropics['time_of_day'] = tropics.time % 1
p = tropics.p[0].values

model = common.get_model('NN-All')
samples = list(bootstrap_samples(tropics, n))
jacobians = [get_jacobian(model, sample) for sample in samples]

# make plot
fig, axs = plt.subplots(
    4, 5, figsize=(common.textwidth, common.textwidth-2), sharex=True, sharey=True)
plt.rcParams['hatch.color'] = '0.5'

axs[0,0].invert_yaxis()
axs[0,0].invert_xaxis()
norm = SymLogNorm(1, 2, vmin=-1e5, vmax=1e5)

for ax, jac in zip(axs.flat, jacobians):
    qt_qt = jac['QT']['QT'].detach().numpy()
    im = ax.pcolormesh(p, p, qt_qt, vmin=-2, vmax=2, cmap='RdBu_r', rasterized=True)
Beispiel #16
0
def train_val_test():
    """Train and val."""
    torch.backends.cudnn.benchmark = True

    # model
    model, model_wrapper = mc.get_model()
    ema = mc.setup_ema(model)
    criterion = torch.nn.CrossEntropyLoss(reduction='none').cuda()
    criterion_smooth = optim.CrossEntropyLabelSmooth(
        FLAGS.model_kwparams['num_classes'],
        FLAGS['label_smoothing'],
        reduction='none').cuda()
    # TODO(meijieru): cal loss on all GPUs instead only `cuda:0` when non
    # distributed

    if FLAGS.get('log_graph_only', False):
        if udist.is_master():
            _input = torch.zeros(1, 3, FLAGS.image_size,
                                 FLAGS.image_size).cuda()
            _input = _input.requires_grad_(True)
            mc.summary_writer.add_graph(model_wrapper, (_input, ),
                                        verbose=True)
        return

    # check pretrained
    if FLAGS.pretrained:
        checkpoint = torch.load(FLAGS.pretrained,
                                map_location=lambda storage, loc: storage)
        if ema:
            ema.load_state_dict(checkpoint['ema'])
            ema.to(get_device(model))
        # update keys from external models
        if isinstance(checkpoint, dict) and 'model' in checkpoint:
            checkpoint = checkpoint['model']
        if (hasattr(FLAGS, 'pretrained_model_remap_keys')
                and FLAGS.pretrained_model_remap_keys):
            new_checkpoint = {}
            new_keys = list(model_wrapper.state_dict().keys())
            old_keys = list(checkpoint.keys())
            for key_new, key_old in zip(new_keys, old_keys):
                new_checkpoint[key_new] = checkpoint[key_old]
                logging.info('remap {} to {}'.format(key_new, key_old))
            checkpoint = new_checkpoint
        model_wrapper.load_state_dict(checkpoint)
        logging.info('Loaded model {}.'.format(FLAGS.pretrained))
    optimizer = optim.get_optimizer(model_wrapper, FLAGS)

    # check resume training
    if FLAGS.resume:
        checkpoint = torch.load(os.path.join(FLAGS.resume,
                                             'latest_checkpoint.pt'),
                                map_location=lambda storage, loc: storage)
        model_wrapper.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        if ema:
            ema.load_state_dict(checkpoint['ema'])
            ema.to(get_device(model))
        last_epoch = checkpoint['last_epoch']
        lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS)
        lr_scheduler.last_epoch = (last_epoch + 1) * FLAGS._steps_per_epoch
        best_val = extract_item(checkpoint['best_val'])
        train_meters, val_meters = checkpoint['meters']
        FLAGS._global_step = (last_epoch + 1) * FLAGS._steps_per_epoch
        if udist.is_master():
            logging.info('Loaded checkpoint {} at epoch {}.'.format(
                FLAGS.resume, last_epoch))
    else:
        lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS)
        # last_epoch = lr_scheduler.last_epoch
        last_epoch = -1
        best_val = 1.
        train_meters = mc.get_meters('train')
        val_meters = mc.get_meters('val')
        FLAGS._global_step = 0

    if not FLAGS.resume and udist.is_master():
        logging.info(model_wrapper)
    if FLAGS.profiling:
        if 'gpu' in FLAGS.profiling:
            mc.profiling(model, use_cuda=True)
        if 'cpu' in FLAGS.profiling:
            mc.profiling(model, use_cuda=False)

    # data
    (train_transforms, val_transforms,
     test_transforms) = dataflow.data_transforms(FLAGS)
    (train_set, val_set, test_set) = dataflow.dataset(train_transforms,
                                                      val_transforms,
                                                      test_transforms, FLAGS)
    (train_loader, calib_loader, val_loader,
     test_loader) = dataflow.data_loader(train_set, val_set, test_set, FLAGS)

    if FLAGS.test_only and (test_loader is not None):
        if udist.is_master():
            logging.info('Start testing.')
        test_meters = mc.get_meters('test')
        validate(last_epoch, calib_loader, test_loader, criterion, test_meters,
                 model_wrapper, ema, 'test')
        return

    # already broadcast by AllReduceDistributedDataParallel
    # optimizer load same checkpoint/same initialization

    if udist.is_master():
        logging.info('Start training.')

    for epoch in range(last_epoch + 1, FLAGS.num_epochs):
        # train
        results = run_one_epoch(epoch,
                                train_loader,
                                model_wrapper,
                                criterion_smooth,
                                optimizer,
                                lr_scheduler,
                                ema,
                                train_meters,
                                phase='train')

        # val
        results = validate(epoch, calib_loader, val_loader, criterion,
                           val_meters, model_wrapper, ema, 'val')
        if results['top1_error'] < best_val:
            best_val = results['top1_error']

            if udist.is_master():
                save_status(model_wrapper, optimizer, ema, epoch, best_val,
                            (train_meters, val_meters),
                            os.path.join(FLAGS.log_dir, 'best_model.pt'))
                logging.info(
                    'New best validation top1 error: {:.4f}'.format(best_val))
        if udist.is_master():
            # save latest checkpoint
            save_status(model_wrapper, optimizer, ema, epoch, best_val,
                        (train_meters, val_meters),
                        os.path.join(FLAGS.log_dir, 'latest_checkpoint.pt'))

        wandb.log(
            {
                "Validation Accuracy": 1. - results['top1_error'],
                "Best Validation Accuracy": 1. - best_val
            },
            step=epoch)


# NOTE(meijieru): from scheduler code, should be called after train/val
# use stepwise scheduler instead
# lr_scheduler.step()
    return
Beispiel #17
0
parser.add_argument('--pdtb-category', default='Comparison',
                    choices=['Comparison', 'Contingency', 'Temporal', 'Expansion', ''],
                    help='PDTB category')
args = parser.parse_args()

d_word_index, results_path = common.get_word_index(args.model, args.glove, args.embedding_size)

# create tester
print("===> creating dataloaders ...")
val_loader = common.get_data_loader(args.model, 'test', d_word_index, args.batch_size, args.max_sentence_length,
                                    pdtb_category=args.pdtb_category)

# load model,optimizer and loss
model, optimizer, criterion = common.get_model(model=args.model,
                                               model_path=results_path,
                                               lr=args.lr,
                                               weight_decay=args.weight_decay,
                                               pdtb_category=args.pdtb_category)
print(optimizer)
print(criterion)

if args.cuda:
    torch.backends.cudnn.enabled = True
    cudnn.benchmark = True
    model.cuda()
    criterion = criterion.cuda()

if args.model == 'grn16':
    common.test_grn16(val_loader, model, criterion, args.cuda, args.print_freq)
elif args.model == 'keann':
    common.test_keann(val_loader, model, criterion, args.cuda, args.print_freq)
Beispiel #18
0
    train_set_emb = DataLoadDf(train_weak_df,
                               many_hot_encoder.encode_weak,
                               transform=Compose(trans_emb))
    valid_set_val = DataLoadDf(valid_weak_df,
                               many_hot_encoder.encode_weak,
                               transform=Compose(trans_emb))
    test_set_val = DataLoadDf(test_df,
                              many_hot_encoder.encode_weak,
                              transform=Compose(trans_emb))

    emb_state = {
        "scaler": scaler.state_dict(),
        "many_hot_encoder": many_hot_encoder.state_dict()
    }
    emb_model, emb_state = get_model(emb_state, args)
    emb_model = to_cuda_if_available(emb_model)
    # Classif_model
    if args.segment:
        X, y = train_set[0]
    else:
        X, y = next(iter(train_load))
    X = to_cuda_if_available(X)
    emb = emb_model(X)
    LOG.info("shape input CNN: x {}, y {}".format(X.shape, y.shape))
    LOG.info("shape after CNN: {}".format(emb.shape))

    if args.n_layers_classif == 2:
        dimensions = [32, 16]
    elif args.n_layers_classif == 1:
        dimensions = [32]