def sub_2(cfg, model, weight_path):
    utils.load_model(weight_path, model)
    result_all = {}
    model = model.eval()
    dataset_sub = RsnaDatasetTest2()
    dataloader = DataLoader(dataset_sub,
                            batch_size=1,
                            shuffle=False,
                            num_workers=3,
                            collate_fn=lambda x: x)
    for (item) in tqdm(dataloader):
        imgs, study_id, sop_arr = item[0]
        _bs = 128
        preds = []
        for i in np.arange(0, len(sop_arr), step=_bs):
            _imgs = torch.from_numpy(imgs[i:i + _bs]).cuda()
            with torch.no_grad():
                outputs = model(_imgs)
            res = torch.sigmoid(outputs["pe_present_on_image"]).cpu().numpy()
            preds.extend(res)
        result_all[study_id] = {
            "outputs": np.array(preds),
            "ids": np.array(sop_arr),
        }
        if args.debug: break

    print("per study result's keys(): ", result_all[study_id].keys())
    # import pdb; pdb.set_trace()
    return result_all
Ejemplo n.º 2
0
def main():
  args = parser.parse_args()
  args, logging, writer = utils.parse_args(args)
  
  logging.info('# Start Re-training #')
  
  criterion = LOSS_FACTORY[args.task](args, args.loss_scaling)

  if args.model_type == "pointwise":
    model_temp = POINTWISE_FACTORY[args.model]
  else:
    raise NotImplementedError("Other models have not been implemented!")
  model = POINTWISE_FACTORY[args.model](args.input_size, args.output_size, args.layers, args.activation, args)
    
  logging.info('## Model created: ##')
  logging.info(model.__repr__())
    
  logging.info("### Param size = %f MB, Total number of params = %d ###" %
              utils.count_parameters_in_MB(model, args))

  logging.info('### Loading model to parallel GPUs ###')
 
  utils.profile(model, args, logging)
  model = utils.model_to_gpus(model, args)
  
  logging.info('### Preparing schedulers and optimizers ###')
  optimizer = torch.optim.Adam(
      model.parameters(),
      args.learning_rate,
      weight_decay = args.weight_decay)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
      optimizer, args.epochs)

  logging.info('## Downloading and preparing data ##')
  train_loader, valid_loader= get_train_loaders(args)
  
  logging.info('## Beginning Training ##')

  train = Trainer(model, criterion, optimizer, scheduler, args)

  best_error, train_time, val_time = train.train_loop(
      train_loader, valid_loader, logging, writer)

  logging.info('## Finished training, the best observed validation error: {}, total training time: {}, total validation time: {} ##'.format(
      best_error, timedelta(seconds=train_time), timedelta(seconds=val_time)))

  logging.info('## Beginning Plotting ##')
  del model 

  with torch.no_grad():
    model = model_temp(args.input_size, args.output_size, args.layers, args.activation, args)
    utils.load_model(model, args.save+"/weights.pt")
    model = utils.model_to_gpus(model, args)

    model.eval()
    plot_regression_uncertainty(model, PLT, train_loader, args)

    logging.info('# Finished #')
Ejemplo n.º 3
0
 def _load_ensemble(self, input_size, output_size, layers, activation):
     sample_names = []
     for root, dirs, files in os.walk(self.args.save):
         for filename in files:
             if ".pt" in filename:
                 sample_name = re.findall('weights_[0-9]*.pt', filename)
                 if len(sample_name) >= 1:
                     sample_name = sample_name[0]
                     sample_names.append(sample_name)
     sample_names.sort(key=natural_keys)
     sample_names = sample_names[:self.args.samples]
     for i in range(self.args.samples):
         model = _LinearNetwork(input_size, output_size, layers, activation,
                                self.args)
         load_model(model, self.args.model_path + "/" + sample_names[i])
         self.ensemble.append(model)
def get_model_paths(noise_type,
                    noise_level,
                    exp_num,
                    init_index,
                    start_epoch=0,
                    end_epoch=None,
                    step=1,
                    points=None,
                    root_dir="../results"):
    """Return model paths of specifed experiment"""
    model_dict = {noise_type: {noise_level: {exp_num: {init_index: {}}}}}
    result_path = root_dir
    models = load_model(model_dict, result_path)
    model_paths = models[noise_type][noise_level][exp_num][init_index]

    last_epoch = int(
        model_paths[-1].split("/")[-1].split(".")[0].split("_")[-1])

    if points != None:
        step = int(last_epoch / points)
        if step == 0:
            step = 1

    if end_epoch != None:
        model_paths = models[noise_type][noise_level][exp_num][init_index][
            start_epoch:end_epoch:step]
    else:
        model_paths = models[noise_type][noise_level][exp_num][init_index][
            start_epoch::step]

    return model_paths
Ejemplo n.º 5
0
def train_model(dataloaders,
                device,
                model,
                criterion,
                optimizer,
                state_path,
                model_name,
                scheduler=None,
                num_epochs=25,
                continue_train=True,
                arcface=False):
    def create_thread(q, dataloader):
        tr = Thread(target=insertData, args=(q, dataloader))  # start inserting
        tr.setDaemon(True)
        tr.start()

    if continue_train and os.path.exists(state_path):
        with open(state_path, 'rb') as f:
            state_dict = pickle.load(f)
        print(state_dict)
        train_loss = state_dict['loss']
        val_loss = state_dict['val_losses']
        accuracy = state_dict['accuracy']
        start = state_dict['epoch']
        model = load_model(model, model_name, start)
        start += 1
        scheduler.load_state_dict(
            torch.load(os.path.join(f'models/{model_name}.scheduler')))
        print(scheduler.state_dict())
    else:
        train_loss, val_loss, accuracy = [], [], []
        start = 0

    for epoch in tqdm(range(start, num_epochs)):
        train_loss.append(
            train_step(dataloaders, device, model, criterion, optimizer,
                       arcface).cpu())
        if scheduler is not None:
            scheduler.step()
        cur_val_loss, cur_acc = eval_step(dataloaders, device, model,
                                          criterion, arcface)
        val_loss.append(cur_val_loss.cpu())
        accuracy.append(cur_acc)
        print(f'Accuracy is {cur_acc}')

        with open(state_path, 'wb') as f:
            pickle.dump(
                {
                    'loss': train_loss,
                    'val_losses': val_loss,
                    'epoch': epoch,
                    'accuracy': accuracy
                }, f)
        torch.save(model.state_dict(),
                   os.path.join(f'models/{model_name}{epoch}.data'))
        if scheduler is not None:
            torch.save(scheduler.state_dict(),
                       os.path.join(f'models/{model_name}.scheduler'))
    return train_loss, val_loss
Ejemplo n.º 6
0
def test_load():
    train, dev, test, word_idx = read_sst(
        u"C:\\Users\\roger\\NLP\\Corpus\\sst_bi\\sst.bi.train",
        u"C:\\Users\\roger\\NLP\\Corpus\\sst_bi\\sst.bi.dev",
        u"C:\\Users\\roger\\NLP\\Corpus\\sst_bi\\sst.bi.test",
    )
    classifier = load_model("cnn_model")
    acc, pred = classifier.test(test[0], test[1])
    print acc
    return pred
def sub(cfg, model, weight_path):
    utils.load_model(weight_path, model)
    model = model.eval()
    dataset_sub = RsnaDatasetTest()

    df_test = pd.read_csv(DATADIR / "test.csv")
    result_all = {}
    for study in tqdm(df_test.StudyInstanceUID.unique()):
        dataset_sub.set_StudyInstanceUID(study)
        loader_sub = DataLoader(dataset_sub,
                                batch_size=32,
                                shuffle=False,
                                pin_memory=True,
                                num_workers=6)
        with torch.no_grad():
            result = run_nn(cfg, 'test', model, loader_sub)
            result_all[study] = result

    print("per study result's keys(): ", result_all[study].keys())
    return result_all
Ejemplo n.º 8
0
def main(result, *masks):
    models = reduce(lambda x, y: x + y, map(glob, masks))
    weights = [get_weights(x) for x in tqdm(models, desc='loading')]

    model = load_model(models[0])

    for i, _ in enumerate(tqdm(model.layers, desc='averaging')):
        w = [x[i] for x in weights]
        w = average(w)
        model.layers[i].set_weights(w)

    model.save(result)
Ejemplo n.º 9
0
def valid(_cfg, model, all_exam=False):
    cfg = copy.deepcopy(_cfg)
    if all_exam:
        cfg["dataset"]["param"][
            "posexam_only"] = False  # validation for all slices
    assert cfg["output"]
    assert not os.path.exists(cfg["output"])
    criterion = factory.get_criterion(cfg)

    path = os.path.join(output_dir, 'fold%d_ep0.pt' % (cfg['fold']))
    print(f'best path: {str(path)}')
    utils.load_model(str(path), model)

    loader_valid = factory.get_loader_valid(cfg)
    with torch.no_grad():
        results = run_nn(cfg,
                         'valid',
                         model,
                         loader_valid,
                         criterion=criterion)
    utils.save_pickle(results, cfg["output"])
    log('saved to %s' % cfg["output"])
def sub_3(cfg, model, weight_path):
    """
    Returns: 
    result_all["study_id"] -> {
        "outputs" -> {"col_name1" -> np.ndarray, "col_name2 -> np.ndarray}
        "ids -> sop_id_arr
    }
    """
    utils.load_model(weight_path, model)
    result_all = {}
    model = model.eval()
    dataset_sub = RsnaDatasetTest2()
    dataloader = DataLoader(dataset_sub,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            collate_fn=lambda x: x)
    for (item) in tqdm(dataloader):
        imgs, study_id, sop_arr = item[0]
        _bs = 64
        outputs_all = defaultdict(list)
        for i in np.arange(0, len(sop_arr), step=_bs):
            _imgs = torch.from_numpy(imgs[i:i + _bs]).cuda()
            with torch.no_grad():
                outputs = model(_imgs)
            for _k in outputs.keys():  # iter over output keys:
                outputs_all[_k].extend(
                    torch.sigmoid(outputs[_k]).cpu().numpy()
                )  # currently all output is binarty logit
        result_all[study_id] = {
            "outputs":
            dict([(k, np.array(v)) for k, v in outputs_all.items()]),
            "ids": np.array(sop_arr),
        }
        if args.debug: break

    print("per study result's keys(): ", result_all[study_id].keys())
    return result_all
Ejemplo n.º 11
0
    def __init__(self, config):
        """
        Initialize this classifier with the given configuration
        Input:
            config: a YAML node with the classifier configuration
        """
        self.__clf_name = config["classifier"]
        clf_config = config[self.__clf_name]
        self.__pickle = clf_config["pickle"]
        self.__clf = utils.load_model(self.__pickle)

        if self.__clf is not None:
            return

        if self.__clf_name == "adaboost":
            self.__clf = self.__init_adaboost(clf_config)
Ejemplo n.º 12
0
    def test_save_and_load_checkpoint(self):
        model = torchvision.models.resnet18(pretrained=False)
        utils.save_checkpoint(model,
                              epoch=100,
                              filename='tmp.pth',
                              save_arch=True)

        loaded_model = utils.load_model('tmp.pth')

        torch.testing.assert_allclose(model.conv1.weight,
                                      loaded_model.conv1.weight)

        model.conv1.weight = nn.Parameter(torch.zeros_like(model.conv1.weight))
        model = utils.load_checkpoint('tmp.pth', model=model)['model']

        assert (model.conv1.weight != 0).any()
Ejemplo n.º 13
0
def test_all_models(test_one_model, model_dict, model_filenames, data_loader,
                    administrator):
    for i, filename in enumerate(model_filenames):
        logging.info("\n")
        model, _ = load_model(model_dict, filename)
        logging.info("Loaded {}. Now testing".format(filename))

        administrator.set_model(model)

        t_valid = time.time()
        logdict = test_one_model(model, data_loader)
        logdict['model'] = filename
        logging.info("Testing took {:.1f} seconds".format(time.time() -
                                                          t_valid))

        #t_log = time.time()
        administrator.log(**logdict)
Ejemplo n.º 14
0
def predict_export(params):

    predictions = {}
    error_class = []

    for class_name in ['arousal', 'valence', 'topic']:  #

        if class_name in ['arousal', 'valence']:
            class_no = 3
        else:
            class_no = 10

        trained_model_path = os.path.join(
            'experiments/pretrained_model/', class_name,
            args.experiment_name + '_' + data_file_name())
        try:
            model = load_model(path=trained_model_path,
                               name=args.experiment_name)
            loader, metadata = load_data_pipeline(params, class_name)
            predictions['prediction_' + class_name] = predict(
                params, model, loader, args.predict_partition, class_name)
            torch.cuda.empty_cache()
        except FileNotFoundError as fnfe:
            print("Model not found: " + str(fnfe))
            print("[WARN!] Set all prediction values for this model to 0")
            error_class.append(class_name)
            continue

    predictions['id'] = metadata['id']  #[args.predict_partition]
    predictions['segment_id'] = metadata['segment_id']

    df = pd.DataFrame.from_dict(predictions)  # , orient='index' .T
    if len(error_class) > 0:
        for ec in error_class:
            df['prediction_' + ec] = np.nan
        df = df.fillna(0)

    header_names = [
        'id', 'segment_id', 'prediction_arousal', 'prediction_valence',
        'prediction_topic'
    ]
    predict_partition = args.predict_partition.replace('valid', 'devel')
    df[header_names].to_csv(output_path + predict_partition + '.csv',
                            header=header_names,
                            index=False)
Ejemplo n.º 15
0
def main():
    argparser = argparse.ArgumentParser(description=__doc__)
    argparser.add_argument('--gpuid',default='0,')
    argparser.add_argument('--arch', default='fnn')
    argparser.add_argument('--loss',default='mse')
    argparser.add_argument('--dataset',default='fashionmnist')
    argparser.add_argument('--load_size', type=int,default=1000)
    argparser.add_argument('--num_wrong_samples',type=int,default=0)
    argparser.add_argument('--num_clean_samples',type=int,default=1000)
    argparser.add_argument('--batch_size', type=int,default=1000)
    argparser.add_argument('--nclasses', type=int,default=2)

    argparser.add_argument('--task', default='loss')
    argparser.add_argument('--dir', default='')
    argparser.add_argument('--save_dir', default='')

    args = argparser.parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"]=args.gpuid
    print(args)

    if args.loss == 'mse':
        ct = torch.nn.MSELoss().cuda()
        one_hot = True
    elif args.loss == 'hinge':
        ct = torch.nn.MultiMarginLoss(p=2)
        one_hot = False
    elif args.loss == 'cross_entropy':
        ct = torch.nn.CrossEntropyLoss()
        one_hot = False
    
    trDL,teDL = load_data(args, stop=True, one_hot=one_hot)
    net = load_model(args.dataset, args.arch)


    if args.task == 'loss':
        compute_loss(net, ct, trDL, teDL,args)
    elif args.task == 'nonuniformity':
        compute_diversity(net,ct,trDL,args)
    elif args.task == 'weight_norm':
        compute_norm(net,args)
    elif args.task == 'sharpness':
        compute_sharpness(net, ct, trDL, args)
    else:
        raise ValueError('Task %s has not been implemented'%(args.task))
Ejemplo n.º 16
0
def builtin_train(args):
    # 1. load dataset and model
    (train_images, train_labels), (test_images,
                                   test_labels) = load_dataset(args.data)
    input_shape = train_images[:args.batch_size, :, :, :].shape
    output_size = max(train_labels) + 1
    model = load_model(args.arch,
                       input_shape=input_shape,
                       output_size=output_size)
    model.summary()

    # 2. set tensorboard cofigs
    logdir = os.path.join(args.logdir, get_current_time())
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

    # 3. loss, optimizer, metrics setting
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"],
    )

    # 4. dataset config
    buffer_size = len(train_images)
    train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
    train_ds = train_ds.shuffle(buffer_size)
    if args.augmentation:
        train_ds = train_ds.map(augment)
    train_ds = train_ds.batch(args.batch_size)
    test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
    test_ds = test_ds.batch(args.batch_size)

    fit_params = {}
    fit_params["batch_size"] = args.batch_size
    fit_params["epochs"] = args.max_epoch
    if args.steps_per_epoch:
        fit_params["steps_per_epoch"] = args.steps_per_epoch
    fit_params["verbose"] = 1
    fit_params["shuffle"] = True
    fit_params["callbacks"] = [tensorboard_callback]
    fit_params["validation_data"] = test_ds

    # 5. start train and test
    model.fit(train_ds, **fit_params)
Ejemplo n.º 17
0
def main(_run):
    args = argparse.Namespace(**_run.config)

    args.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")

    # Load test data
    test_loader = load_data(args)
    ex.info["test_size"] = len(test_loader.dataset)

    # Load model
    model_bold = load_model_bold(args)
    model = load_model(args, model_bold)
    model = model.to(args.device)
    model.eval()

    # Define a Loss function and optimizer
    criterion = nn.CrossEntropyLoss()

    correct = 0
    total = 0
    running_loss = 0.
    with torch.no_grad():
        for _, data in enumerate(test_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data[0].to(args.device), data[1].to(args.device)

            # predict labels
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    loss = running_loss / len(test_loader)
    ex.info["accuracy"] = accuracy
    ex.info["loss"] = loss
    print("Testing the network on the {} test images".format(
        ex.info["test_size"]))
    print("Test Accuracy: {}".format(accuracy))
    print("Test Loss: {}".format(loss))
Ejemplo n.º 18
0
def train_model(dataloaders,
                device,
                model,
                criterion,
                optimizer,
                state_path,
                model_name,
                scheduler=None,
                num_epochs=25,
                continue_train=False):
    if continue_train and os.path.exists(state_path):
        with open(state_path, 'rb') as f:
            state_dict = pickle.load(f)
        print(state_dict)
        train_loss = state_dict['loss']
        val_loss = state_dict['val_losses']
        accuracy = state_dict['accuracy']
        start = state_dict['epoch']
        model = load_model(model, model_name, start)
        start += 1
    else:
        train_loss, val_loss, accuracy = [], [], []
        start = 0

    for epoch in tqdm(range(start, num_epochs)):
        train_loss.append(
            train_step(dataloaders, device, model, criterion, optimizer).cpu())
        cur_val_loss, cur_acc = eval_step(dataloaders, device, model)
        val_loss.append(cur_val_loss.cpu())
        accuracy.append(cur_acc)
        print(f'Accuracy is {cur_acc}')

        with open(state_path, 'wb') as f:
            pickle.dump(
                {
                    'loss': train_loss,
                    'val_losses': val_loss,
                    'epoch': epoch,
                    'accuracy': accuracy
                }, f)
        torch.save(model.state_dict(),
                   os.path.join(f'models/{model_name}{epoch}.data'))
    return train_loss, val_loss
def text_classify_predict_main(model_name,
                               predict_file,
                               output_file=None,
                               seg=True,
                               encoding='utf8',
                               language='english'):
    sys.stdout.write("Load model ...\n")
    model = load_model(model_name)
    sys.stdout.write("Loaded model from %s\n" % model_name)
    assert type(model) == TextClassifier
    texts = list()
    if seg:
        word_segmentor = ChineseWordSegmentor('ictclas')
    else:
        word_segmentor = None
    sys.stdout.write("Loaded Data from %s ...\n" % predict_file)
    with open(predict_file) as fin:
        for line in fin:
            line = line.decode(encoding=encoding).strip()
            if seg:
                token = ' '.join(word_segmentor.segment(line))
            else:
                token = line
            token = generate_sentence_token(token,
                                            max_len=MAX_LEN,
                                            remove_stop=REMOVE_STOP,
                                            low_case=LOW_CASE,
                                            language=language)
            texts.append(token)
    sys.stdout.write("Predict Data ...\n")
    prob_result = [model.predict_text_prob(' '.join(token)) for token in texts]
    if output_file is None:
        out = sys.stdout
    else:
        sys.stdout.write("Save Result to %s \n" % output_file)
        out = open(output_file, 'w')
    for prob in prob_result:
        write_str = model.prob_to_str(prob[0]) + '\n'
        out.write(write_str.encode('utf8'))
    if output_file is not None:
        out.close()
Ejemplo n.º 20
0
def main():
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpuid
    trDL, teDL = load_data(args, stop=True, one_hot=True)
    net = load_model(args.dataset,
                     args.arch,
                     width=args.width,
                     depth=args.depth)
    ct = torch.nn.MSELoss()
    print('# of parameters', num_parameters(net))

    res = scan(net,
               ct,
               trDL,
               teDL,
               args.model,
               verbose=True,
               niters=50,
               nonuniformity=args.nonuniformity)
    with open(args.save_res, 'wb') as f:
        pickle.dump(res, f)
Ejemplo n.º 21
0
def train():
    """Train function."""

    args.outputs_dir = params['save_model_path']

    if args.group_size > 1:
        init()
        context.set_auto_parallel_context(
            device_num=get_group_size(),
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True)
        args.outputs_dir = os.path.join(args.outputs_dir,
                                        "ckpt_{}/".format(str(get_rank())))
        args.rank = get_rank()
    else:
        args.outputs_dir = os.path.join(args.outputs_dir, "ckpt_0/")
        args.rank = 0

    if args.group_size > 1:
        args.max_epoch = params["max_epoch_train_NP"]
        args.loss_scale = params['loss_scale'] / 2
        args.lr_steps = list(map(int, params["lr_steps_NP"].split(',')))
        params['train_type'] = params['train_type_NP']
        params['optimizer'] = params['optimizer_NP']
        params['group_params'] = params['group_params_NP']
    else:
        args.max_epoch = params["max_epoch_train"]
        args.loss_scale = params['loss_scale']
        args.lr_steps = list(map(int, params["lr_steps"].split(',')))

    # create network
    print('start create network')
    criterion = openpose_loss()
    criterion.add_flags_recursive(fp32=True)
    network = OpenPoseNet(vggpath=params['vgg_path'],
                          vgg_with_bn=params['vgg_with_bn'])
    if params["load_pretrain"]:
        print("load pretrain model:", params["pretrained_model_path"])
        load_model(network, params["pretrained_model_path"])
    train_net = BuildTrainNetwork(network, criterion)

    # create dataset
    if os.path.exists(args.jsonpath_train) and os.path.exists(args.imgpath_train) \
            and os.path.exists(args.maskpath_train):
        print('start create dataset')
    else:
        print('Error: wrong data path')
        return 0

    num_worker = 20 if args.group_size > 1 else 48
    de_dataset_train = create_dataset(args.jsonpath_train,
                                      args.imgpath_train,
                                      args.maskpath_train,
                                      batch_size=params['batch_size'],
                                      rank=args.rank,
                                      group_size=args.group_size,
                                      num_worker=num_worker,
                                      multiprocessing=True,
                                      shuffle=True,
                                      repeat_num=1)
    steps_per_epoch = de_dataset_train.get_dataset_size()
    print("steps_per_epoch: ", steps_per_epoch)

    # lr scheduler
    lr_stage, lr_base, lr_vgg = get_lr(params['lr'] * args.group_size,
                                       params['lr_gamma'],
                                       steps_per_epoch,
                                       args.max_epoch,
                                       args.lr_steps,
                                       args.group_size,
                                       lr_type=params['lr_type'],
                                       warmup_epoch=params['warmup_epoch'])

    # optimizer
    if params['group_params']:
        vgg19_base_params = list(
            filter(lambda x: 'base.vgg_base' in x.name,
                   train_net.trainable_params()))
        base_params = list(
            filter(lambda x: 'base.conv' in x.name,
                   train_net.trainable_params()))
        stages_params = list(
            filter(lambda x: 'base' not in x.name,
                   train_net.trainable_params()))

        group_params = [{
            'params': vgg19_base_params,
            'lr': lr_vgg
        }, {
            'params': base_params,
            'lr': lr_base
        }, {
            'params': stages_params,
            'lr': lr_stage
        }]

        if params['optimizer'] == "Momentum":
            opt = Momentum(group_params, learning_rate=lr_stage, momentum=0.9)
        elif params['optimizer'] == "Adam":
            opt = Adam(group_params)
        else:
            raise ValueError("optimizer not support.")
    else:
        if params['optimizer'] == "Momentum":
            opt = Momentum(train_net.trainable_params(),
                           learning_rate=lr_stage,
                           momentum=0.9)
        elif params['optimizer'] == "Adam":
            opt = Adam(train_net.trainable_params(), learning_rate=lr_stage)
        else:
            raise ValueError("optimizer not support.")

    # callback
    config_ck = CheckpointConfig(
        save_checkpoint_steps=params['ckpt_interval'],
        keep_checkpoint_max=params["keep_checkpoint_max"])
    ckpoint_cb = ModelCheckpoint(prefix='{}'.format(args.rank),
                                 directory=args.outputs_dir,
                                 config=config_ck)
    time_cb = TimeMonitor(data_size=de_dataset_train.get_dataset_size())
    if args.rank == 0:
        callback_list = [MyLossMonitor(), time_cb, ckpoint_cb]
    else:
        callback_list = [MyLossMonitor(), time_cb]

    # train
    if params['train_type'] == 'clip_grad':
        train_net = TrainOneStepWithClipGradientCell(train_net,
                                                     opt,
                                                     sens=args.loss_scale)
        train_net.set_train()
        model = Model(train_net)
    elif params['train_type'] == 'fix_loss_scale':
        loss_scale_manager = FixedLossScaleManager(args.loss_scale,
                                                   drop_overflow_update=False)
        train_net.set_train()
        model = Model(train_net,
                      optimizer=opt,
                      loss_scale_manager=loss_scale_manager)
    else:
        raise ValueError("Type {} is not support.".format(
            params['train_type']))

    print("============== Starting Training ==============")
    model.train(args.max_epoch,
                de_dataset_train,
                callbacks=callback_list,
                dataset_sink_mode=False)
    return 0
Ejemplo n.º 22
0
#########################################################################
# Prepare model
#########################################################################
student_config = BertConfig(os.path.join(args.bert_model, 'bert_config.json'))
if args.kd_model.lower() in ['kd', 'kd.cls']:
    logger.info('using normal Knowledge Distillation')
    output_all_layers = args.kd_model.lower() == 'kd.cls'
    student_encoder, student_classifier = init_model(
        task_name, output_all_layers, args.student_hidden_layers,
        student_config)

    n_student_layer = len(student_encoder.bert.encoder.layer)
    student_encoder = load_model(student_encoder,
                                 args.encoder_checkpoint,
                                 args,
                                 'student',
                                 verbose=True)
    logger.info('*' * 77)
    student_classifier = load_model(student_classifier,
                                    args.cls_checkpoint,
                                    args,
                                    'classifier',
                                    verbose=True)
elif args.kd_model.lower() == 'kd.full':
    logger.info('using FULL Knowledge Distillation')
    layer_idx = [int(i) for i in args.fc_layer_idx.split(',')]
    num_fc_layer = len(layer_idx)
    if args.weights is None or args.weights.lower() in ['none']:
        weights = np.array([1] * (num_fc_layer - 1) +
                           [num_fc_layer - 1]) / 2 / (num_fc_layer - 1)
Ejemplo n.º 23
0
 def initialize(self):
     self.net = utils.load_model()
                punctuation = punctuation_reverse_map[punctuation_index]

                if punctuation == " ":
                    output_file.write("%s%s" % (punctuation, word))
                else:
                    if write_readable_text:
                        output_file.write("%s %s" % (punctuation[:1], word))
                    else:
                        output_file.write(" %s %s" % (punctuation, word))
    
if __name__ == "__main__":
    
    if len(sys.argv) > 4:
        model_name = sys.argv[1]    
        net = utils.load_model(model_name)
        net.batch_size = 1
        net.reset_state()
        punctuation_reverse_map = utils.get_reverse_map(net.out_vocabulary)
        
        write_readable_text = bool(int(sys.argv[2]))        
        text_has_pause_duration_tags = bool(int(sys.argv[3]))

        output_file_path = sys.argv[4]

        if len(sys.argv) > 5:
            with open(sys.argv[5], 'r') as unpunctuated_file:
                unpunctuated_text = " ".join(unpunctuated_file.readlines())
        else:
            unpunctuated_text = " ".join(sys.stdin.readlines())
        
                       batch_size=args.batch_size,
                       shuffle=False,
                       num_workers=4)

for data, label in te_loader:
    data, label = tensor2cuda(data), tensor2cuda(label)

    break

adv_list = []
pred_list = []

with torch.no_grad():
    model = WideResNet(depth=34, num_classes=10, widen_factor=10, dropRate=0.0)

    load_model(model, args.load_checkpoint)

    if torch.cuda.is_available():
        model.cuda()

    attack = FastGradientSignUntargeted(model,
                                        max_epsilon,
                                        args.alpha,
                                        min_val=0,
                                        max_val=1,
                                        max_iters=args.k,
                                        _type=perturbation_type)

    adv_data = attack.perturb(data, label, 'mean', False)

    output = model(adv_data, _eval=True)
Ejemplo n.º 26
0
elif args.loss == 'hinge':
    ct = torch.nn.MultiMarginLoss(p=2)
    one_hot = False
elif args.loss == 'cross_entropy':
    print('Loss')
    ct = torch.nn.CrossEntropyLoss()
    one_hot = False


# Load data and model
for lr,n_iters in zip(args.lr,args.n_iters):
    for i in range(args.n_tries):
        print('==== Start of %d-th Experiment ==='%(i+1))

        trDL,teDL = load_data(args,one_hot=one_hot)
        net = load_model(args.dataset,args.arch)
        #net.apply(lambda t: weights_init(t,args.gain,args.init))

        optimizer = torch.optim.LBFGS(net.parameters(),lr = lr)
        trainer = Trainer(iter_display = args.iter_display)
        trainer.set_model(net,ct,optimizer,scheduler=None)
        res=trainer.train_sgd(
                            trDL,
                            batch_size = args.batch_size,
                            iter_start = 1,
                            iter_end = n_iters,
                            tol = args.tol
                        )

        trDL.reset()
        trL,trA,trC = eval_accuracy(net,ct,trDL)
Ejemplo n.º 27
0
            with open(TEST_LIST_PATH, "a") as test_list_file:
                [noise_type, noise_level,
                 hyperparam_index], init_index = keys, key
                test_list_file.write(
                    "{dict_index} {noise_type} {noise_level} {hyperparam_index} {init_index}\n"
                    .format(dict_index=dict_index,
                            noise_type=noise_type,
                            noise_level=noise_level,
                            hyperparam_index=hyperparam_index,
                            init_index=init_index))
        else:
            raise ValueError(
                "The dictionary provided to the write_final_epoch_path_to_file function was not in the correct format."
            )


if __name__ == "__main__":
    root_dir = '../results/mnist'
    experiment_dicts = get_experiment_dicts(root_dir)

    paths_per_experiment_dict = []
    for experiment_dict in experiment_dicts:
        model_paths = load_model(experiment_dict, path_to_results=root_dir)
        paths_per_experiment_dict.append(model_paths)

    os.makedirs(TEST_LIST_DIRECTORY, exist_ok=True)
    open(TEST_LIST_PATH, "w").close()
    np.savez_compressed(TEST_DICT_PATH, data=paths_per_experiment_dict)

    for index, dictionary in enumerate(paths_per_experiment_dict):
        write_details_to_file(dictionary, dict_index=index)
Ejemplo n.º 28
0
def train(cfg, model):
    criterion = factory.get_criterion(cfg)
    # optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    optim = factory.get_optimizer(cfg, model.parameters())

    best = {
        'loss': float('inf'),
        'score': 0.0,
        'epoch': -1,
    }
    if "resume_from" in cfg.keys() and cfg["resume_from"]:
        detail = utils.load_model(cfg["resume_from"], model, optim=optim)
        best.update({
            'loss': detail['loss'],
            'score': detail['score'],
            'epoch': detail['epoch'],
        })

        # to set lr manually after resumed
        for param_group in optim.param_groups:
            param_group['lr'] = cfg["optimizer"]["param"]["lr"]
        log(f"initial lr {utils.get_lr(optim)}")

    scheduler, is_reduce_lr = factory.get_scheduler(cfg, optim)
    log(f"is_reduce_lr: {is_reduce_lr}")

    loader_train = factory.get_loader_train(cfg)
    loader_valid = factory.get_loader_valid(cfg)

    log('train data: loaded %d records' % len(loader_train.dataset))
    log('valid data: loaded %d records' % len(loader_valid.dataset))

    log('apex %s' % cfg["apex"])
    if cfg["apex"]:
        amp.initialize(model, optim, opt_level='O1')

    for epoch in range(best['epoch'] + 1, cfg["epoch"]):

        log(f'\n----- epoch {epoch} -----')

        run_nn(cfg,
               'train',
               model,
               loader_train,
               criterion=criterion,
               optim=optim,
               apex=cfg["apex"])

        with torch.no_grad():
            val = run_nn(cfg,
                         'valid',
                         model,
                         loader_valid,
                         criterion=criterion)

        detail = {
            'score': val['score'],
            'loss': val['loss'],
            'epoch': epoch,
        }
        if val['loss'] <= best['loss']:
            best.update(detail)
            utils.save_model(model,
                             optim,
                             detail,
                             cfg["fold"],
                             output_dir,
                             best=True)

        utils.save_model(model, optim, detail, cfg["fold"], output_dir)

        log('[best] ep:%d loss:%.4f score:%.4f' %
            (best['epoch'], best['loss'], best['score']))

        if is_reduce_lr:
            scheduler.step(val['loss'])  # reducelronplateau
        else:
            scheduler.step()
                    help='name of videoplayer predictions file.')
parser.add_argument('-csv',
                    '--csvfile',
                    default='',
                    help='name of csv predictions file.')
parser.add_argument('-trf',
                    '--trfile',
                    default='',
                    help='name of tracker predictions file.')
args = parser.parse_args()

assert (args.vpfile or args.csvfile or args.trfile)
assert (args.offset >= 1)

states_df = data.get_statewise_data()
model, cp = utils.load_model(args.experimentid, args.checkpoint, v=False)
prediction_date = (states_df.date.max().to_pydatetime() -
                   dt.timedelta(days=args.offset)).strftime("%Y-%m-%d")
print("Predicting for:", prediction_date)

api = predictions.generate(states_df,
                           constants.STT_INFO,
                           model,
                           cp,
                           args.taskidx,
                           args.days,
                           args.offset,
                           plot=False)

if args.vpfile:
    predictions.export_videoplayer(api, prediction_date, args.vpfile)
Ejemplo n.º 30
0
def train_mlt_single(args):
    global logger
    logger.info(args)
    task_lst, vocabs = utils.get_data(args.data_path)
    task_db = task_lst[args.task_id]
    train_data = task_db.train_set
    dev_data = task_db.dev_set
    test_data = task_db.test_set
    task_name = task_db.task_name

    if args.debug:
        train_data = train_data[:200]
        dev_data = dev_data[:200]
        test_data = test_data[:200]
        args.epochs = 3
        args.pruning_iter = 3

    summary_writer = SummaryWriter(
        log_dir=os.path.join(args.tb_path, "global/%s" % task_name)
    )

    logger.info("task name: {}, task id: {}".format(task_db.task_name, task_db.task_id))
    logger.info(
        "train len {}, dev len {}, test len {}".format(
            len(train_data), len(dev_data), len(test_data)
        )
    )

    # init model
    model = get_model(args, task_lst, vocabs)

    logger.info("model: \n{}".format(model))
    if args.init_weights is not None:
        utils.load_model(model, args.init_weights)

    if utils.need_acc(task_name):
        metrics = [AccuracyMetric(target="y"), MetricInForward(val_name="loss")]
        metric_key = "acc"

    else:
        metrics = [
            YangJieSpanMetric(
                tag_vocab=vocabs[task_name],
                pred="pred",
                target="y",
                seq_len="seq_len",
                encoding_type="bioes" if task_name == "ner" else "bio",
            ),
            MetricInForward(val_name="loss"),
        ]
        metric_key = "f"
    logger.info(metrics)

    need_cut_names = list(set([s.strip() for s in args.need_cut.split(",")]))
    prune_names = []
    for name, p in model.named_parameters():
        if not p.requires_grad or "bias" in name:
            continue
        for n in need_cut_names:
            if n in name:
                prune_names.append(name)
                break

    # get Pruning class
    pruner = Pruning(
        model, prune_names, final_rate=args.final_rate, pruning_iter=args.pruning_iter
    )
    if args.init_masks is not None:
        pruner.load(args.init_masks)
        pruner.apply_mask(pruner.remain_mask, pruner._model)
    # save checkpoint
    os.makedirs(args.save_path, exist_ok=True)

    logger.info('Saving init-weights to {}'.format(args.save_path))
    torch.save(
        model.cpu().state_dict(), os.path.join(args.save_path, "init_weights.th")
    )
    torch.save(args, os.path.join(args.save_path, "args.th"))
    # start training and pruning
    summary_writer.add_scalar("remain_rate", 100.0, 0)
    summary_writer.add_scalar("cutoff", 0.0, 0)

    if args.init_weights is not None:
        init_tester = Tester(
            test_data,
            model,
            metrics=metrics,
            batch_size=args.batch_size,
            num_workers=4,
            device="cuda",
            use_tqdm=False,
        )
        res = init_tester.test()
        logger.info("No init testing, Result: {}".format(res))
        del res, init_tester

    for prune_step in range(pruner.pruning_iter + 1):
        # reset optimizer every time
        optim_params = [p for p in model.parameters() if p.requires_grad]
        # utils.get_logger(__name__).debug(optim_params)
        utils.get_logger(__name__).debug(len(optim_params))
        optimizer = get_optim(args.optim, optim_params)
        # optimizer = TriOptim(optimizer, args.n_filters, args.warmup, args.decay)
        factor = pruner.cur_rate / 100.0
        factor = 1.0
        # print(factor, pruner.cur_rate)
        for pg in optimizer.param_groups:
            pg["lr"] = factor * pg["lr"]
        utils.get_logger(__name__).info(optimizer)

        trainer = Trainer(
            train_data,
            model,
            loss=LossInForward(),
            optimizer=optimizer,
            metric_key=metric_key,
            metrics=metrics,
            print_every=200,
            batch_size=args.batch_size,
            num_workers=4,
            n_epochs=args.epochs,
            dev_data=dev_data,
            save_path=None,
            sampler=fastNLP.BucketSampler(batch_size=args.batch_size),
            callbacks=[
                pruner,
                # LRStep(lstm.WarmupLinearSchedule(optimizer, args.warmup, int(len(train_data)/args.batch_size*args.epochs)))
                GradientClipCallback(clip_type="norm", clip_value=5),
                LRScheduler(
                    lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep))
                ),
                LogCallback(path=os.path.join(args.tb_path, "No", str(prune_step))),
            ],
            use_tqdm=False,
            device="cuda",
            check_code_level=-1,
        )
        res = trainer.train()
        logger.info("No #{} training, Result: {}".format(pruner.prune_times, res))
        name, val = get_metric(res)
        summary_writer.add_scalar("prunning_dev_acc", val, prune_step)
        tester = Tester(
            test_data,
            model,
            metrics=metrics,
            batch_size=args.batch_size,
            num_workers=4,
            device="cuda",
            use_tqdm=False,
        )
        res = tester.test()
        logger.info("No #{} testing, Result: {}".format(pruner.prune_times, res))
        name, val = get_metric(res)
        summary_writer.add_scalar("pruning_test_acc", val, prune_step)

        # prune and save
        torch.save(
            model.state_dict(),
            os.path.join(
                args.save_path,
                "best_{}_{}.th".format(pruner.prune_times, pruner.cur_rate),
            ),
        )
        pruner.pruning_model()
        summary_writer.add_scalar("remain_rate", pruner.cur_rate, prune_step + 1)
        summary_writer.add_scalar("cutoff", pruner.last_cutoff, prune_step + 1)

        pruner.save(
            os.path.join(
                args.save_path, "{}_{}.th".format(pruner.prune_times, pruner.cur_rate)
            )
        )
Ejemplo n.º 31
0
    else:
        args.eval_batch_size = 32

    args.raw_data_dir = os.path.join(HOME_DATA_FOLDER, 'data_raw', task)
    run_folder = os.path.join(KD_DIR, task, sub_dir, run_folder)
    encoder_file = glob.glob(run_folder + '/*e.%d.encoder.pkl' % epoch)
    cls_file = glob.glob(run_folder + '/*e.%d.cls.pkl' % epoch)
    assert len(encoder_file) == 1 and len(
        cls_file) == 1, f'encoder/cls file error: {encoder_file}, {cls_file}'
    encoder_file, cls_file = encoder_file[0], cls_file[0]

    encoder_bert, classifier = init_model(task, output_all_layers, n_layer,
                                          config)
    encoder_bert = load_model(encoder_bert,
                              encoder_file,
                              args,
                              'exact',
                              verbose=True)
    classifier = load_model(classifier, cls_file, args, 'exact', verbose=True)

    all_res = {'train': None, 'dev': None, 'test': None}
    if 'dev' in interested_set or 'valid' in interested_set:
        dev_examples, dev_dataloader, dev_label_ids = get_task_dataloader(
            task.lower(), 'dev', tokenizer, args, SequentialSampler,
            args.eval_batch_size)
        dev_res = eval_model_dataloader(encoder_bert,
                                        classifier,
                                        dev_dataloader,
                                        args.device,
                                        detailed=True,
                                        verbose=False)