예제 #1
0
def train(restore, is_master=True):
    strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
    with strategy.scope():
        encoders = get_encoders()
        dataset = get_dataset(encoders)
        train_data = dataset.batch(config.BATCH_SIZE)

        _, generator = get_generator(encoders)

        checkpoint_path = path.join(config.CHECKPOINT_DIR, "keras",
                                    "generator.ckpt")
        if restore:
            generator.load_weights(checkpoint_path)

    callbacks = []
    if is_master:
        generator.summary()
        stats_filename = datetime.now().strftime("%Y%m%d_%H%M") + ".csv"
        callbacks = [
            K.callbacks.CSVLogger(
                path.join(config.LOG_DIR, "stats", stats_filename)),
            # K.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True),
            EvaluationLogger(generator, dataset, encoders)
        ]
    initial_epoch = generator.optimizer.iterations.numpy(
    ) // config.STEPS_PER_EPOCH
    generator.fit(train_data,
                  epochs=config.NUM_EPOCHS,
                  initial_epoch=initial_epoch,
                  steps_per_epoch=config.STEPS_PER_EPOCH,
                  callbacks=callbacks)
예제 #2
0
def _get_dataset(name, batch_size, buffer_size, partial):
    dataset_path = _util.get_rel_datasets_path(name)
    _util.ensure_dir(dataset_path)

    return get_dataset(dataset_path,
                       batch_size=batch_size,
                       buffer_size=buffer_size,
                       partial=partial).map(_only_cropped_scan)
예제 #3
0
def main():
    args = get_arguments()
    logger.info(args)
    target_size = args.get('size')
    padding_ratio = args.get('padding')

    dataset = get_dataset(args.get('dataset', None))
    print(dataset.root_path)

    img_paths = dataset.df_meta[dataset.img_colname].to_list()
    if dataset.relative_path:
        img_paths = [os.path.join(dataset.root_path, p) for p in img_paths]

    new_dataset_name = f'{dataset.name()}_{target_size}_pad={padding_ratio}'
    root_path = os.path.join(environments.DATASET_DIR, new_dataset_name)
    os.makedirs(root_path, exist_ok=True)

    if args.get('gpu', False):
        logger.info('use gpu')
        ctx = mx.gpu()
    else:
        ctx = mx.cpu()
    detector = MtcnnDetector(minsize=100, num_worker=1, ctx=ctx)

    results = []
    for p in tqdm(img_paths, total=len(img_paths)):
        img = cv2.imread(p)
        dirname = p.split('/')[-2]
        filename = p.split('/')[-1].split('.')[0]
        dir_path = os.path.join(root_path, dirname)
        os.makedirs(dir_path, exist_ok=True)

        try:
            clipped, prob = clip_most_humanise_image(
                detector,
                img,
                target_size=target_size,
                padding_ratio=padding_ratio)
            new_filename = f'{filename}_{prob:.3f}.jpg'
        except NotDetectionError as e:
            # 切り取れなかった時は中心から切り出す
            img = Image.fromarray(img)
            prob = -1
            clipped = clop_center(img, target_shape=(target_size, target_size))
            clipped = np.array(clipped)
            new_filename = f'{filename}_not-detected.jpg'

        new_path = os.path.join(dir_path, new_filename)
        cv2.imwrite(new_path, clipped)
        results.append([os.path.relpath(new_path, root_path), prob])

    df_meta = pd.DataFrame(results, columns=['img_path', 'prob'])
    df_meta['origin_path'] = dataset.df_meta[dataset.img_colname]
    df_meta[dataset.label_colname] = dataset.df_meta[dataset.label_colname]
    df_meta.to_csv(os.path.join(root_path, 'meta.csv'), index=False)
예제 #4
0
def vae_train(config):
    config['outdir'].mkdir(parents=True, exist_ok=True)

    # get device
    if config['use_gpu']:
        device = torch.device('cuda')
        # moving a tensor to GPU
        # useful at BUT cluster to prevent someone from getting the same GPU
        fake = torch.Tensor([1]).to(device)
    else:
        device = torch.device('cpu')

    dataset_class = get_dataset(config['dataset_type'])

    # compute or load mean and std of dataset
    trans = lambda x: logspec(x, **config['spectrum_conf'])
    dataset = dataset_class(config['dataset'], transform=trans)
    dataloader_meanstd = DataLoader(dataset)
    meanstd_norm = get_meanstd_norm(config['meanstd_norm_file'],
                                    dataloader_meanstd)

    # load the dataset
    trans = lambda x: meanstd_norm(logspec(x, **config['spectrum_conf']))
    dataset = dataset_class(config['dataset'], transform=trans)
    dataloader_train = DataLoader(dataset,
                                  batch_size=config['batch_size'],
                                  collate_fn=PadCollate(),
                                  shuffle=True)

    # create the model
    model = SeqVAESpeaker(**config['vae_conf']).to(device)

    # store model config
    with open(config['outdir'] / 'vae_config', 'w') as f:
        json.dump(config['vae_conf'], f, indent=2)

    # load loss function
    if config['vae_objective'] == 'elbo':
        loss = ELBOLoss(model).to(device)
    elif config['vae_objective'] == 'elbo_speakerid':
        loss = ELBOSpeakerLoss(model, config['speaker_loss_weight']).to(device)
    else:
        raise KeyError(f'Unknown objective {config["vae_objective"]}')

    # run training
    trainer = Trainer(model,
                      loss,
                      dataloader_train,
                      config['outdir'],
                      device=device,
                      **config['optimizer_conf'])
    trainer.run()
예제 #5
0
def train(restore):
    encoders = get_encoders()
    dataset = get_dataset(encoders, difficulty=10)
    text_rnn, generator, discriminator, gan = get_models(encoders)

    checkpoint_path = path.join(config.CHECKPOINT_DIR, "keras",
                                "text_rnn.ckpt")
    if restore:
        text_rnn.load_weights(checkpoint_path)

    logger = EvaluationLogger(generator, dataset, encoders)
    accumulator = MetricsAccumulator(path.join(config.LOG_DIR, "stats"))

    _train_on_batch_f = _get_train_on_batch_f(generator, discriminator, gan,
                                              accumulator)

    difficulty = 10
    dataset = get_dataset(encoders, difficulty)
    train_data = dataset.batch(config.BATCH_SIZE).take(config.STEPS_PER_EPOCH)
    for epoch in range(config.NUM_EPOCHS):
        # if epoch >= 500 and epoch % 10==0:
        #     difficulty += 1
        #     dataset = get_dataset(encoders, difficulty)
        #     train_data = dataset.batch(config.BATCH_SIZE).take(config.STEPS_PER_EPOCH)
        start_time = time.time()
        discr_only_steps = 0  # if epoch < 500 else 1
        for b, (text_inputs_dict, images) in enumerate(train_data):
            print(f"{b} completed", end="\r")
            train_part = TRAIN_D if epoch < 5 else \
                        TRAIN_GD if b%(discr_only_steps+1) == 0 else TRAIN_D
            _train_on_batch_f(text_inputs_dict, images, train_part)
        accumulator.accumulate(epoch)
        logger.on_epoch_end(epoch)
        logging.info(
            "Done with epoch %s took %ss (difficulty=%s; discr_only_steps=%s)",
            epoch, round(time.time() - start_time,
                         2), difficulty, discr_only_steps)
예제 #6
0
def standardize(dataset: str):
    """

    :param dataset:
    :return:
    """
    assert isinstance(dataset, str) and len(dataset)

    tf.enable_eager_execution()

    train_path = _util.get_rel_datasets_path(dataset, "train")
    _util.ensure_dir(train_path)

    dataset_path = _util.get_rel_datasets_path(dataset)

    standardized_name = _get_standardized_name(dataset)
    standardized_path = _util.get_rel_datasets_path(standardized_name)
    # _util.ensure_path_free(standardized_path, empty_ok=True)
    # _util.mkdir(standardized_path)

    train_data = _dataset.get_dataset(train_path, partial=True)
    train_iter = train_data.repeat().make_one_shot_iterator()
    train_records = _dataset.get_records(train_path, partial=True)

    # Compute sample mean over train
    total = train_iter.next()[0][0]
    for _ in tqdm(train_records[1:]):
        sample = train_iter.next()

        total += sample[0][0]
    mean = total / len(train_records)

    total = tf.square(train_iter.next()[0][0] - mean)
    for _ in tqdm(train_records[1:]):
        sample = train_iter.next()

        scan = sample[0][0]
        total += tf.square(scan - mean)
    std = tf.sqrt(tf.reduce_mean(total))

    _standardize_dataset(train_path, dataset, mean, std)
    _standardize_dataset(_util.get_rel_datasets_path(dataset, "dev"), dataset,
                         mean, std)
    _standardize_dataset(_util.get_rel_datasets_path(dataset, "test"), dataset,
                         mean, std)

    _dataset.save_shape(standardized_path, _dataset.load_shape(dataset_path))
    _dataset.save_mean(standardized_path, mean.numpy())
    _dataset.save_std(standardized_path, std.numpy())
예제 #7
0
def test_simple_cnn_model():
    args = update_args(cfg_file='simple_cnn')
    device = 'cpu'

    train_data_dict = get_dataset(args).train_data_dict

    train_dataloader = get_dataloader(
        batch_size=args.TRAIN.BATCH_SIZE,
        dataset=train_data_dict['dataset'],
        num_workers=args.DATA.NUM_WORKERS,
        sampler=train_data_dict['train_sampler'],
    )

    validation_dataloader = get_dataloader(
        batch_size=args.TRAIN.BATCH_SIZE,
        dataset=train_data_dict['dataset'],
        num_workers=args.DATA.NUM_WORKERS,
        sampler=train_data_dict['validation_sampler'],
    )

    model = get_model(
        args=args,
        device=device,
        hparams={
            'learning rate': args.TRAIN.LR,
            'batch size': args.TRAIN.BATCH_SIZE,
        },
    )

    for data, labels in train_dataloader:
        data, labels = data.to(device), labels.to(device)
        outputs = model(data)
        assert data.shape[0] == labels.shape[0] == outputs.shape[
            0] == args.TRAIN.BATCH_SIZE
        assert data.shape[1] == 3
        assert outputs.shape[1] == len(args.DATA.CLASSES)

        break

    for data, labels in validation_dataloader:
        data, labels = data.to(device), labels.to(device)
        outputs = model(data)
        assert data.shape[0] == labels.shape[0] == outputs.shape[
            0] == args.TRAIN.BATCH_SIZE
        assert data.shape[1] == 3
        assert outputs.shape[1] == len(args.DATA.CLASSES)

        break
예제 #8
0
def visualize(dataset: str):
    """

    :param dataset:
    :return:
    """
    assert isinstance(dataset, str) and len(dataset)
    tf.enable_eager_execution()

    dataset_path = _util.get_rel_datasets_path(dataset)
    _util.ensure_dir(dataset_path)

    data = _dataset.get_dataset(dataset_path, 1, 1, partial=True)

    scan = data.make_one_shot_iterator().next()[0][0].numpy()
    show_scan(scan.squeeze(), "")
예제 #9
0
def downsample(dataset: str, shape: List[int], partial=False):
    """

    :param dataset:
    :param shape:
    :return:
    """
    assert isinstance(dataset, str) and len(dataset)
    assert isinstance(shape, list) and all(isinstance(s, int)
                                           for s in shape) and len(shape) == 3
    assert isinstance(partial, bool)
    tf.enable_eager_execution()

    dataset_path = _util.get_rel_datasets_path(dataset)
    _util.ensure_dir(dataset_path)

    data = _dataset.get_dataset(dataset_path, 1, 8, partial=partial)

    resized_dataset = "{}_resized".format(dataset)
    resized_path = _util.get_rel_datasets_path(resized_dataset)
    _util.ensure_path_free(resized_path)
    _util.mkdir(resized_path)

    iter = data.make_one_shot_iterator()
    records = _dataset.get_records(dataset_path, partial)

    for record in tqdm(records):
        record = record.replace(dataset, resized_dataset)
        sample = iter.next()

        scan = sample[0][0].numpy().squeeze()
        # show_scan(scan, "Original")

        crop = crop_image(scan, 1e-5)
        # show_scan(crop, "Crop")

        factors = [s / d for d, s in zip(crop.shape, shape)]
        resized = ndimage.zoom(crop, zoom=factors, order=4)
        # show_scan(resized, "Resized")

        _dataset.write_record(record, resized, sample[0][1].numpy().squeeze(),
                              sample[1].numpy())

    _dataset.save_shape(resized_path, shape)
예제 #10
0
def get_dataloader(cfg: object, mode: str) -> tuple:
    """Get dataloader function

    This is function to get dataloaders.
    Get dataset, then make dataloaders.

    Args:
        cfg: Config.
        mode: Mode. 
            trainval: For trainning and validation.
            test: For test.

    Returns:
        Tuple of dataloaders.

    """

    log.info(f"Loading {cfg.data.dataset.name} dataset...")

    dataset = get_dataset(cfg, mode)
    sampler = get_sampler(cfg, mode, dataset)

    if mode == "trainval":
        train_dataloader = DataLoader(cfg,
                                      dataset=dataset.train,
                                      sampler=sampler.train)
        val_dataloader = DataLoader(cfg,
                                    dataset=dataset.val,
                                    sampler=sampler.val)
        dataloaders = (train_dataloader, val_dataloader)

    elif mode == "test":
        test_dataloader = DataLoader(cfg,
                                     dataset=dataset.test,
                                     sampler=sampler.test)
        dataloaders = (test_dataloader)

    log.info(f"Successfully loaded {cfg.data.dataset.name} dataset.")

    return dataloaders
예제 #11
0
def _standardize_dataset(dataset_path, dataset, mean, std):
    data = _dataset.get_dataset(dataset_path,
                                partial=True).make_one_shot_iterator()
    records = _dataset.get_records(dataset_path, partial=True)

    standardized_name = _get_standardized_name(dataset)
    standardized_path = dataset_path.replace(dataset, standardized_name)
    _util.ensure_path_free(standardized_path, empty_ok=True)
    _util.mkdir(standardized_path)
    for record in tqdm(records):
        record = record.replace(dataset, standardized_name)
        sample = data.next()

        scan = sample[0][0]
        # show_scan(scan.numpy().squeeze(), "Original")

        standardized = (scan - mean) / std
        # show_scan(standardized.numpy().squeeze(), "Standardized")

        _dataset.write_record(record,
                              standardized.numpy().squeeze(),
                              sample[0][1].numpy().squeeze(),
                              sample[1].numpy())
예제 #12
0
def train(restore):
    encoders = get_encoders()
    dataset = get_dataset(encoders)

    text_rnn, generator = get_generator(encoders)

    checkpoint_path = path.join(config.CHECKPOINT_DIR, "keras",
                                "generator.ckpt")
    if restore:
        generator.load_weights(checkpoint_path)

    stats_filename = datetime.now().strftime('%Y%m%d_%H%M') + ".csv"
    callbacks = [
        # K.callbacks.TensorBoard(path.join(config.LOG_DIR, "tf_boards")),
        K.callbacks.CSVLogger(
            path.join(config.LOG_DIR, "stats", stats_filename)),
        K.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                    save_weights_only=True),
        EvaluationLogger(generator, dataset, encoders)
    ]

    # https://github.com/keras-team/keras/issues/1872#issuecomment-572606922
    initial_epoch = generator.optimizer.iterations.numpy(
    ) // config.STEPS_PER_EPOCH
    train_data = dataset.batch(config.BATCH_SIZE).take(config.STEPS_PER_EPOCH)
    # val_data = dataset.batch(config.BATCH_SIZE).take(8)
    generator.fit(
        train_data,
        epochs=config.NUM_EPOCHS,
        initial_epoch=initial_epoch,
        # validation_data=val_data,
        callbacks=callbacks)

    checkpoint_path = path.join(config.CHECKPOINT_DIR, "keras",
                                "text_rnn.ckpt")
    text_rnn.save_weights(checkpoint_path)
예제 #13
0
def noise_models_train(config):
    config['outdir'].mkdir(parents=True, exist_ok=True)
    dataset_class = get_dataset(config['dataset_type'])

    # compute or load mean and std of dataset
    trans = lambda x: logspec(x, **config['spectrum_conf'])
    dataset = dataset_class(config['dataset'], transform=trans)
    dataloader_meanstd = DataLoader(dataset)
    meanstd_norm = get_meanstd_norm(config['meanstd_norm_file'],
                                    dataloader_meanstd)

    # load the dataset
    dataset = dataset_class(config['dataset'])

    for low, high in config['snrs']:
        for use_norm in [True, False]:
            logging.info(
                f'Noise model for SNR {low}-{high} dB, use norm: {use_norm}')
            name = f'snr_{low}_{high}'
            name = f'{name}_wonorm' if not use_norm else name
            get_noise_stats(config['outdir'] / name, (low, high),
                            dataset,
                            config['spectrum_conf'],
                            meanstd_norm=meanstd_norm if use_norm else None)
예제 #14
0
class LinearMetrics(nn.Module):
    def __init__(self, input_dim, out_dim):
        super(LinearMetrics, self).__init__()
        self.linear = nn.Linear(input_dim, out_dim)

    def forward(self, x, label):
        return self.linear(x)


if __name__ == '__main__':

    opt = Config()
    device = torch.device("cuda")

    train_dataset = get_dataset(Config.dataset,
                                phase='train',
                                input_shape=environments.INPUT_SHAPE)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=opt.train_batch_size,
                                   shuffle=True,
                                   num_workers=opt.num_workers)

    logger.info('{} train iters per epoch:'.format(len(train_loader)))

    if opt.loss == 'focal_loss':
        criterion = FocalLoss(gamma=2)
    elif opt.loss == 'logloss':
        criterion = torch.nn.CrossEntropyLoss()
    else:
        raise ValueError()
예제 #15
0
파일: train.py 프로젝트: ryuji0123/tmp
def main(
    args,
    args_file_path: str,
    tmp_results_dir: str,
    train_log_file_path: str,
) -> None:
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    train_data_dict = get_dataset(args).train_data_dict

    train_dataloader = get_dataloader(
        batch_size=args.TRAIN.BATCH_SIZE,
        dataset=train_data_dict['dataset'],
        num_workers=args.DATA.NUM_WORKERS,
        sampler=train_data_dict['train_sampler'],
    )

    validation_dataloader = get_dataloader(
        batch_size=args.TRAIN.BATCH_SIZE,
        dataset=train_data_dict['dataset'],
        num_workers=args.DATA.NUM_WORKERS,
        sampler=train_data_dict['validation_sampler'],
    )

    model = get_model(
        args=args,
        device=device,
        hparams={
            'learning rate': args.TRAIN.LR,
            'batch size': args.TRAIN.BATCH_SIZE,
        },
    ).to(device)

    mlflow_logger = MLFlowLogger(experiment_name=args.MLFLOW.EXPERIMENT_NAME, )

    checkpoint_callback = ModelCheckpoint(monitor='validation_accuracy')

    trainer = pl.Trainer(
        callbacks=[checkpoint_callback],
        distributed_backend=args.TRAIN.DISTRIBUTED_BACKEND,
        gpus=args.TRAIN.GPUS,
        logger=mlflow_logger,
        max_epochs=args.TRAIN.MAX_EPOCHS,
        replace_sampler_ddp=False,
    )

    try:
        exist_error = False
        trainer.fit(model, train_dataloader, validation_dataloader)
    except Exception:
        run_id = mlflow_logger.run_id
        if run_id is not None:
            error_file_path = join(tmp_results_dir, 'error_log.txt')
            with open(error_file_path, 'w') as f:
                traceback.print_exc(file=f)
            exist_error = True
            print()
            print('Failed to train. See error_log.txt on mlflow.')
            print(f'Experiment name: {args.MLFLOW.EXPERIMENT_NAME}')
            print(f'Run id: {run_id}')
            sys.exit(1)
    finally:
        run_id = mlflow_logger.run_id
        if run_id is not None:
            with open(args_file_path, 'w') as f:
                with redirect_stdout(f):
                    print(args.dump())

            mlflow_client = MlflowClient()
            mlflow_client.log_artifact(run_id, args_file_path)
            mlflow_client.log_artifact(run_id, train_log_file_path)
            if exist_error:
                mlflow_client.log_artifact(run_id, error_file_path)
            rmtree(tmp_results_dir, ignore_errors=True)
예제 #16
0
def objective(trial, args, tmp_results_dir: str) -> float:
    timestamp = datetime.today().strftime('%Y-%m-%d-%H:%M:%S')
    cur_tmp_results_dir = join(tmp_results_dir, timestamp)
    makedirs(cur_tmp_results_dir, exist_ok=True)

    args_file_path = join(cur_tmp_results_dir, 'args.yaml')
    train_log_file_path = join(cur_tmp_results_dir, 'log.txt')

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    train_data_dict = get_dataset(args).train_data_dict

    train_dataloader = get_dataloader(
        batch_size=args.TRAIN.BATCH_SIZE,
        dataset=train_data_dict['dataset'],
        num_workers=args.DATA.NUM_WORKERS,
        sampler=train_data_dict['train_sampler'],
    )

    validation_dataloader = get_dataloader(
        batch_size=args.TRAIN.BATCH_SIZE,
        dataset=train_data_dict['dataset'],
        num_workers=args.DATA.NUM_WORKERS,
        sampler=train_data_dict['validation_sampler'],
    )

    model = get_model(
        args=args,
        device=device,
        trial=trial,
        # if you want to decide hparams with trial, you don't need to write any values here.
        # This code decides learning rate with trial and record it in model's configure_optimizers method.
        hparams={
            'batch size': args.TRAIN.BATCH_SIZE,
        },
    ).to(device)

    mlflow_logger = MLFlowLogger(experiment_name=args.MLFLOW.EXPERIMENT_NAME, )

    checkpoint_callback = ModelCheckpoint(monitor='validation_accuracy')

    trainer = pl.Trainer(
        callbacks=[checkpoint_callback],
        distributed_backend=args.TRAIN.DISTRIBUTED_BACKEND,
        gpus=args.TRAIN.GPUS,
        logger=mlflow_logger,
        max_epochs=args.TRAIN.MAX_EPOCHS,
        replace_sampler_ddp=False,
    )

    try:
        exist_error = False
        print(f'To see training logs, you can check {train_log_file_path}')
        with open(train_log_file_path, 'w') as f:
            with redirect_stdout(f):
                trainer.fit(model, train_dataloader, validation_dataloader)
    except Exception:
        run_id = mlflow_logger.run_id
        if run_id is not None:
            error_file_path = join(tmp_results_dir, 'error_log.txt')
            with open(error_file_path, 'w') as f:
                traceback.print_exc(file=f)
            exist_error = True
            print()
            print('Failed to train. See error_log.txt on mlflow.')
            print(f'Experiment name: {args.MLFLOW.EXPERIMENT_NAME}')
            print(f'Run id: {run_id}')
            sys.exit(1)
    finally:
        run_id = mlflow_logger.run_id
        if run_id is not None:
            with open(args_file_path, 'w') as f:
                with redirect_stdout(f):
                    print(args.dump())
            mlflow_client = MlflowClient()
            mlflow_client.log_artifact(run_id, args_file_path)
            mlflow_client.log_artifact(run_id, train_log_file_path)
            if exist_error:
                mlflow_client.log_artifact(run_id, error_file_path)
            rmtree(cur_tmp_results_dir, ignore_errors=True)

    return checkpoint_callback.best_model_score
예제 #17
0
 def input_fn():
     dataset = get_dataset(encoders)
     return dataset.batch(config.BATCH_SIZE)
예제 #18
0
def main(base_path, set_name=None, writer=None):
    """
        Main eval loop: Iterates over all evaluation samples and saves the corresponding predictions.
    """
    # default value
    if set_name is None:
        set_name = ['evaluation']

    if 'training' in set_name:  #set_name == 'training':
        # initialize train datasets
        train_loaders = []
        if args.controlled_exp:
            # Use subset of datasets so that final dataset size is constant
            limit_size = int(args.controlled_size / len(args.train_datasets))
        else:
            limit_size = None
        for dat_name in args.train_datasets:  # iteration = min(dataset_len)/batch_size; go each dataset at a batchsize
            if dat_name == 'FreiHand':
                if len(args.train_queries_frei) > 0:
                    train_queries = args.train_queries_frei
                else:
                    train_queries = args.train_queries
                base_path = args.freihand_base_path
            elif dat_name == 'RHD':
                if len(args.train_queries_rhd) > 0:
                    train_queries = args.train_queries_rhd
                else:
                    train_queries = args.train_queries
                base_path = args.rhd_base_path
            elif (dat_name == 'Obman') or (dat_name == 'Obman_hand'):
                train_queries = args.train_queries
            elif dat_name == 'HO3D':
                if len(args.train_queries_ho3d) > 0:
                    train_queries = args.train_queries_ho3d
                else:
                    train_queries = args.train_queries
                base_path = args.ho3d_base_path

            train_dat = get_dataset(
                dat_name,
                'training',  #set_name,
                base_path,
                queries=train_queries,
                train=True,
                limit_size=limit_size,
                #transform=transforms.Compose([transforms.Rescale(256),transforms.ToTensor()]))
            )
            print("Training dataset size: {}".format(len(train_dat)))
            # Initialize train dataloader

            train_loader0 = torch.utils.data.DataLoader(
                train_dat,
                batch_size=args.train_batch,
                shuffle=True,  #check
                num_workers=args.num_workers,
                pin_memory=True,
                drop_last=True,
            )
            train_loaders.append(train_loader0)
        train_loader = ConcatDataloader(train_loaders)
    #if 'evaluation' in set_name:
    val_loaders = []
    for dat_name_val in args.val_datasets:
        if dat_name_val == 'FreiHand':
            val_queries = args.val_queries
            base_path = args.freihand_base_path
        elif dat_name_val == 'RHD':
            val_queries = args.val_queries
            base_path = args.rhd_base_path
        elif dat_name_val == 'HO3D':
            val_queries = args.val_queries
            base_path = args.ho3d_base_path
        val_dat = get_dataset(
            dat_name_val,
            'evaluation',
            base_path,
            queries=val_queries,
            train=False,
            #transform=transforms.Compose([transforms.Rescale(256),transforms.ToTensor()]))
        )
        print("Validation dataset size: {}".format(len(val_dat)))
        val_loader = torch.utils.data.DataLoader(
            val_dat,
            batch_size=args.val_batch,
            shuffle=False,
            num_workers=args.num_workers,
            pin_memory=True,
            drop_last=False,
        )
        val_loaders.append(val_loader)
    val_loader = ConcatDataloader(val_loaders)

    #current_epoch = 0
    if len(args.train_datasets) == 1:
        dat_name = args.train_datasets[0]  #dat_name
    else:
        dat_name = args.train_datasets

    #losses = AverageMeter()
    if 'training' in set_name:  #set_name == 'training':
        if args.optimizer == "Adam":
            optimizer = optim.Adam(model.parameters(),
                                   lr=args.init_lr,
                                   betas=(0.9, 0.999),
                                   weight_decay=0)
        if args.optimizer == "AdamW":
            optimizer = optim.Adam(model.parameters(),
                                   lr=args.init_lr,
                                   betas=(0.9, 0.999),
                                   eps=1e-08,
                                   weight_decay=0.01,
                                   amsgrad=False)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=args.lr_steps,
                                                   gamma=args.lr_gamma)

        for epoch in range(1, args.total_epochs + 1):
            mode_train = True
            requires = args.train_requires
            args.train_batch = args.train_batch
            TrainVal(mode_train, dat_name, epoch + current_epoch, train_loader,
                     model, optimizer, requires, args, writer)
            torch.cuda.empty_cache()

            # save parameters
            if (epoch + current_epoch) % args.save_interval == 0:
                # test
                mode_train = False
                requires = args.test_requires
                args.train_batch = args.val_batch
                print('For test part:')
                TrainVal(mode_train, dat_name_val, epoch + current_epoch,
                         val_loader, model, optimizer, requires, args, writer)
                torch.cuda.empty_cache()
                save_model(model, optimizer, epoch, current_epoch, args)
            scheduler.step()
    elif 'evaluation' in set_name:  #set_name == 'evaluation':
        mode_train = False
        requires = args.test_requires
        optimizer = optim.Adam(model.parameters(),
                               lr=args.init_lr,
                               betas=(0.9, 0.999),
                               weight_decay=0)  #
        #epoch = 0
        #current_epoch = 0
        #save_model(model,optimizer,epoch,current_epoch, args)
        #import pdb; pdb.set_trace()
        TrainVal(mode_train, dat_name_val, current_epoch, val_loader, model,
                 None, requires, args, writer)
        print("Finish write prediction. Good luck!")

    print("Done!")
예제 #19
0
def train(dataset: str,
          epochs: int,
          batch_size: int,
          buffer_size: int,
          lr: float,
          l2_reg=0.,
          tv_reg=0.,
          ssim_loss=0.,
          sobel_loss=0.):
    """
    Trains an Autoencoder using the specified parameters.

    :param dataset: Existing dataset over which to train. Must contain train, dev, {mean,std}.pickle, shape.json
    :param epochs: Number of iterations over training data before termination.
    :param batch_size: Number of training samples per batch.
    :param buffer_size: Number of batches to prefetch.
    :param lr: Adam optimization initial learning rate.
    :param l2_reg: L2 regularization coefficient for kernel weights.
    :param tv_reg: Total Variation regularization coefficient for data.
    :param ssim_loss: SSIM regularization coefficient for data.
    :param sobel_loss: L2 regularization coefficient for data Sobel difference.
    """
    assert isinstance(dataset, str) and len(dataset)
    assert isinstance(epochs, int) and epochs > 0
    assert isinstance(batch_size, int) and batch_size > 0
    assert isinstance(buffer_size, int) and batch_size > 0
    assert isinstance(lr, float) and lr > 0
    assert isinstance(l2_reg, float) and l2_reg >= 0
    assert isinstance(tv_reg, float) and tv_reg >= 0
    assert isinstance(ssim_loss, float) and ssim_loss >= 0
    assert isinstance(sobel_loss, float) and sobel_loss >= 0

    # Load and ensure required paths.
    weights_path = _util.get_weights_path_by_param(model="autoencoder",
                                                   dataset=dataset,
                                                   epochs=epochs,
                                                   batch_size=batch_size,
                                                   lr=lr,
                                                   l2_reg=l2_reg,
                                                   tv_reg=tv_reg,
                                                   ssim_loss=ssim_loss,
                                                   sobel_loss=sobel_loss)
    log_path = os.path.join(weights_path, "logs")
    _util.ensure_path_free(log_path, empty_ok=True)
    _util.mkdir(log_path)
    dataset_path = _util.get_rel_datasets_path(dataset)
    _util.ensure_dir(dataset_path)

    # Load model and input shape.
    shape = _dataset.load_shape(dataset_path)
    mean = _dataset.load_mean(dataset_path)
    std = _dataset.load_std(dataset_path)
    model = Autoencoder(l2_reg)

    # Create input/output placeholders.
    inp = tf.image.per_image_standardization(
        tf.placeholder(tf.float32, shape=[None, *shape]))
    out = model.call(inp)

    # Initialize loss functions.
    total_loss, l2_loss, l2_reg, tv_reg, ssim_loss, sobel_loss = \
        _get_losses(inp, out, batch_size, model.losses, l2_reg, tv_reg, ssim_loss, sobel_loss)
    # Configure training operation.
    train_op = _get_train_op(total_loss, lr)

    # Load datasets
    train_dataset = (_dataset.get_dataset(
        os.path.join(dataset_path, "train"), partial=True).map(
            _only_cropped_scan).batch(batch_size).prefetch(buffer_size))
    dev_dataset = (_dataset.get_dataset(
        os.path.join(dataset_path, "dev"), partial=True).map(
            _only_cropped_scan).batch(batch_size).prefetch(buffer_size))

    # Setup logging and weight saving.
    _tboard.configure(log_path, flush_secs=2)
    saver = tf.train.Saver()

    # Initialize training loop variables.
    best_dev_loss, dev_loss = np.inf, np.inf
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        _logger.info("Counting datasets...")
        train_batches = dataset_iter_len(
            sess,
            train_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tTrain samples: {}".format(train_batches))
        dev_batches = dataset_iter_len(
            sess,
            dev_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tDev samples: {}".format(dev_batches))

        train_loss = total_loss / train_batches
        dev_loss = total_loss / dev_batches

        train_dataset = (_dataset.get_dataset(
            os.path.join(dataset_path, "train"), partial=True).map(
                _only_cropped_scan).batch(batch_size).prefetch(buffer_size))

        for epoch in tqdm(range(epochs)):
            train_iter = train_dataset.make_one_shot_iterator().get_next()

            losses = defaultdict(float)
            for _ in range(train_batches):
                sample = sess.run(train_iter)
                _, _train_loss, _l2_loss, _l2_reg, _tv_reg, _ssim_loss, _sobel_loss = \
                    sess.run(
                        [train_op, train_loss, l2_loss, l2_reg, tv_reg, ssim_loss, sobel_loss],
                        feed_dict={inp: sample})
                losses["train/loss/total"] += _train_loss
                losses["train/loss/l2_loss"] += _l2_loss
                losses["train/reg/l2"] += _l2_reg
                losses["train/reg/tv"] += _tv_reg
                losses["train/loss/ssim"] += _ssim_loss
                losses["train/loss/sobel"] += _sobel_loss

            # Increment before doing anything else to avoid zero-indexed epochs.
            epoch += 1

            # Log training losses to tensorboard.
            for name, val in losses.items():
                _tboard.log_value(name, val, step=epoch)
            _logger.info("Epoch {}: train loss {}".format(
                epoch, losses["train/loss/total"]))

            # Compute dev metrics every 2 epochs.
            if epoch < 2 or epoch % 2 == 0:
                losses.clear()

                # Compute and log dev loss
                _dev_loss, _l2_loss, _l2_reg, _tv_reg, _ssim_loss, _sobel_loss = \
                    _get_dev_loss(sess, inp, dev_dataset, dev_batches, dev_loss, l2_loss, l2_reg, tv_reg, ssim_loss, sobel_loss)

                # Log dev losses to tensorboard.
                _logger.info("Epoch {}: dev loss {}".format(epoch, _dev_loss))

                _tboard.log_value("dev/loss/total", _dev_loss, step=epoch)
                _tboard.log_value("dev/loss/l2_loss", _l2_loss, step=epoch)
                _tboard.log_value("dev/reg/l2", _l2_reg, step=epoch)
                _tboard.log_value("dev/reg/tv", _tv_reg, step=epoch)
                _tboard.log_value("dev/loss/ssim", _ssim_loss, step=epoch)
                _tboard.log_value("dev/loss/sobel", _sobel_loss, step=epoch)

                # Save best model.
                if _dev_loss < best_dev_loss:
                    save_path = saver.save(
                        sess,
                        os.path.join(weights_path, "{}.ckpt".format(epoch)))
                    _logger.info(
                        "Saved new best model to {}".format(save_path))
                    best_dev_loss = _dev_loss

                # Plot some reconstruction images
                _logger.info("Generating reconstruction plots...")
                _log_reconstruction_imgs("eval", sess, train_dataset, inp, out,
                                         epoch, mean, std)
                _log_reconstruction_imgs("train", sess, train_dataset, inp,
                                         out, epoch, mean, std)
예제 #20
0
                    type=str,
                    default='checkpoints',
                    help='save model directory')
opt = parser.parse_args()
print(opt)

logging.basicConfig(
    level=logging.INFO,  #打印日志级别数值
    format='%(asctime)s: %(message)s',  #输出时间和信息
    stream=sys.stdout  #指定日志的输出流
)

cudnn.benchmark = True

logging.info('=========== Starting Training ============')
train_data, test_data, char_to_index, index_to_char, n_class = get_dataset(opt)

net = Attention_ocr(use_gpu=opt.use_gpu, NUM_CLASS=n_class)

optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr, betas=(0.9, 0.999))
criterion = losses.Attention_loss()

net = torch.nn.DataParallel(net)
net = net.cuda()
model = Train_Engine(net)
model.fit(index_to_char,
          train_data=train_data,
          test_data=test_data,
          optimizer=optimizer,
          criterion=criterion,
          epochs=opt.epochs,