예제 #1
0
def train(opts):
    CUDA = True if torch.cuda.is_available() and not opts.no_cuda else False
    device = 'cuda' if CUDA else 'cpu'
    num_devices = 1
    np.random.seed(opts.seed)
    random.seed(opts.seed)
    torch.manual_seed(opts.seed)
    if CUDA:
        torch.cuda.manual_seed_all(opts.seed)
        num_devices = torch.cuda.device_count()
        print('[*] Using CUDA {} devices'.format(num_devices))
    else:
        print('[!] Using CPU')
    print('Seeds initialized to {}'.format(opts.seed))

    #torch.autograd.set_detect_anomaly(True)

    # --------------------- 
    # Build Model

    minions_cfg = worker_parser(opts.net_cfg)
    #make_transforms(opts, minions_cfg)
    opts.random_scale = str2bool(opts.random_scale)

    dsets, collater_keys = build_dataset_providers(opts, minions_cfg)
    dset, va_dset = dsets
    # Build collater, appending the keys from the loaded transforms to the
    # existing default ones
    collater = DictCollater()
    collater.batching_keys.extend(collater_keys)
    dloader = DataLoader(dset, batch_size=opts.batch_size,
                         shuffle=True, collate_fn=collater,
                         num_workers=opts.num_workers,drop_last=True,
                         pin_memory=CUDA)
    # Compute estimation of bpe. As we sample chunks randomly, we
    # should say that an epoch happened after seeing at least as many
    # chunks as total_train_wav_dur // chunk_size
    bpe = (dset.total_wav_dur // opts.chunk_size) // opts.batch_size
    print ("Dataset has a total {} hours of training data".format(dset.total_wav_dur/16000/3600.0))
    opts.bpe = bpe
    if opts.do_eval:
        assert va_dset is not None, (
            "Asked to do validation, but failed to build validation set"
        )
        va_dloader = DataLoader(va_dset, batch_size=opts.batch_size,
                                shuffle=True, collate_fn=DictCollater(),
                                num_workers=opts.num_workers,drop_last=True,
                                pin_memory=CUDA)
        va_bpe = (va_dset.total_wav_dur // opts.chunk_size) // opts.batch_size
        opts.va_bpe = va_bpe
    else:
        va_dloader = None
    # fastet lr to MI
    #opts.min_lrs = {'mi':0.001}

    if opts.fe_cfg is not None:
        with open(opts.fe_cfg, 'r') as fe_cfg_f:
            print(fe_cfg_f)
            fe_cfg = json.load(fe_cfg_f)
            print(fe_cfg)
    else:
        fe_cfg = None

    # load config file for attention blocks
    if opts.att_cfg:
        with open(opts.att_cfg) as f:
            att_cfg = json.load(f)
            print(att_cfg)
    else:
        att_cfg = None

    print(str2bool(opts.tensorboard))
    Trainer = trainer(frontend_cfg=fe_cfg,
                      att_cfg=att_cfg,
                      minions_cfg=minions_cfg,
                      cfg=vars(opts),
                      backprop_mode=opts.backprop_mode,
                      lr_mode=opts.lr_mode,
                      tensorboard=str2bool(opts.tensorboard),
                      device=device)
    print(Trainer.model)
    print('Frontend params: ', Trainer.model.frontend.describe_params())

    Trainer.model.to(device)

    Trainer.train_(dloader, device=device, valid_dataloader=va_dloader)
예제 #2
0
파일: train.py 프로젝트: EdwardDixon/pase
def train(opts):
    CUDA = True if torch.cuda.is_available() and not opts.no_cuda else False
    device = 'cuda' if CUDA else 'cpu'
    num_devices = 1
    np.random.seed(opts.seed)
    random.seed(opts.seed)
    torch.manual_seed(opts.seed)
    if CUDA:
        torch.cuda.manual_seed_all(opts.seed)
        num_devices = torch.cuda.device_count()
        print('[*] Using CUDA {} devices'.format(num_devices))
    else:
        print('[!] Using CPU')
    print('Seeds initialized to {}'.format(opts.seed))

    #torch.autograd.set_detect_anomaly(True)

    # ---------------------
    # Build Model

    minions_cfg = worker_parser(opts.net_cfg)
    #make_transforms(opts, minions_cfg)
    opts.random_scale = str2bool(opts.random_scale)

    dsets, collater_keys = build_dataset_providers(opts, minions_cfg)
    dset, va_dset = dsets
    # Build collater, appending the keys from the loaded transforms to the
    # existing default ones
    collater = DictCollater()
    collater.batching_keys.extend(collater_keys)
    dloader = DataLoader(dset,
                         batch_size=opts.batch_size,
                         shuffle=True,
                         collate_fn=collater,
                         num_workers=opts.num_workers,
                         drop_last=True,
                         pin_memory=CUDA)
    # Compute estimation of bpe. As we sample chunks randomly, we
    # should say that an epoch happened after seeing at least as many
    # chunks as total_train_wav_dur // chunk_size
    bpe = (dset.total_wav_dur // opts.chunk_size) // opts.batch_size
    print("Dataset has a total {} hours of training data".format(
        dset.total_wav_dur / 16000 / 3600.0))
    opts.bpe = bpe
    if opts.do_eval:
        assert va_dset is not None, (
            "Asked to do validation, but failed to build validation set")
        va_dloader = DataLoader(va_dset,
                                batch_size=opts.batch_size,
                                shuffle=True,
                                collate_fn=DictCollater(),
                                num_workers=opts.num_workers,
                                drop_last=True,
                                pin_memory=CUDA)
        va_bpe = (va_dset.total_wav_dur // opts.chunk_size) // opts.batch_size
        opts.va_bpe = va_bpe
    else:
        va_dloader = None
    # fastet lr to MI
    #opts.min_lrs = {'mi':0.001}

    if opts.fe_cfg is not None:
        with open(opts.fe_cfg, 'r') as fe_cfg_f:
            print(fe_cfg_f)
            fe_cfg = json.load(fe_cfg_f)
            print(fe_cfg)
    else:
        fe_cfg = None

    # load config file for attention blocks
    if opts.att_cfg:
        with open(opts.att_cfg) as f:
            att_cfg = json.load(f)
            print(att_cfg)
    else:
        att_cfg = None

    print(str2bool(opts.tensorboard))
    Trainer = trainer(frontend_cfg=fe_cfg,
                      att_cfg=att_cfg,
                      minions_cfg=minions_cfg,
                      cfg=vars(opts),
                      backprop_mode=opts.backprop_mode,
                      lr_mode=opts.lr_mode,
                      tensorboard=str2bool(opts.tensorboard),
                      device=device)

    model_description = str(Trainer.model)
    tfh = tempfile.NamedTemporaryFile(mode="w")
    tfh.write(model_description)
    tfh.flush()

    print(model_description)
    num_params = Trainer.model.frontend.describe_params()
    print(f'Frontend params: {num_params}')

    # Prepare logging
    neptune_settings = None
    npt_exp = None
    if opts.neptune is not None:
        with open(opts.neptune, "r") as fh:
            neptune_settings = json.load(fh)
            fh.close()

        neptune.init(neptune_settings["project_name"],
                     api_token=neptune_settings["api_key"])
        npt_exp = neptune.create_experiment(params=vars(opts),
                                            name=opts.experimentname,
                                            tags=opts.tags)
    else:
        # running offline
        neptune.init(backend=neptune.OfflineBackend(),
                     project_qualified_name="offline/PASE+")

    neptune.log_artifact(tfh.name, "model_description.txt")
    tfh.close()
    neptune.set_property("frontend_params", num_params)

    Trainer.model.to(device)

    Trainer.train_(dloader, device=device, valid_dataloader=va_dloader)