Example #1
0
def train(args, logdir1, logdir2):
    # model
    model = Net2()

    # dataflow
    df = Net2DataFlow(hp.train2.data_path, hp.train2.batch_size)

    # set logger for event and model saver
    logger.set_logger_dir(logdir2)

    session_conf = tf.ConfigProto(
    #    log_device_placement=True,
        allow_soft_placement=True,
        gpu_options=tf.GPUOptions(
    #         allow_growth=True,
            per_process_gpu_memory_fraction=0.6,
        ),
    )

    session_inits = []
    ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2)
    if ckpt2:
        session_inits.append(SaverRestore(ckpt2))
    ckpt1 = tf.train.latest_checkpoint(logdir1)
    if ckpt1:
        session_inits.append(SaverRestore(ckpt1, ignore=['global_step']))
    train_conf = TrainConfig(
        model=model,
        data=QueueInput(df(n_prefetch=1000, n_thread=4)),
        callbacks=[
            # TODO save on prefix net2
            ModelSaver(checkpoint_dir=logdir2),
            # ConvertCallback(logdir2, hp.train2.test_per_epoch),
        ],
        max_epoch=hp.train2.num_epochs,
        steps_per_epoch=hp.train2.steps_per_epoch,
        session_init=ChainInit(session_inits),
        session_config=session_conf
    )
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
        train_conf.nr_tower = len(args.gpu.split(','))

    #trainer = SyncMultiGPUTrainerParameterServer(hp.train2.num_gpu)
    trainer = SimpleTrainer()
    launch_train_with_config(train_conf, trainer=trainer)
Example #2
0
def train(args, logdir):

    # model
    print("####model")
    model = Net1()

    # dataflow
    print("####dataflow")
    df = Net1DataFlow(hp.Train1.data_path, hp.Train1.batch_size)

    # set logger for event and model saver
    print("####logger")
    logger.set_logger_dir(logdir)

    print("####session_conf")
    session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions(
        allow_growth=True, ),
                                  allow_soft_placement=True)

    print("####train_conf")
    train_conf = TrainConfig(
        model=model,
        data=QueueInput(df(n_prefetch=1000, n_thread=5)),
        callbacks=[
            ModelSaver(checkpoint_dir=logdir),
            # TODO EvalCallback()
        ],
        max_epoch=hp.Train1.num_epochs,
        steps_per_epoch=hp.Train1.steps_per_epoch,
        session_config=session_conf)
    print("####ckpt")
    ckpt = '{}/{}'.format(
        logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir)
    if ckpt:
        train_conf.session_init = SaverRestore(ckpt)

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
        train_conf.nr_tower = len(args.gpu.split(','))

    print("####trainer")
    trainer = SyncMultiGPUTrainerReplicated(hp.Train1.num_gpu)

    print("####launch_train_with_config")
    launch_train_with_config(train_conf, trainer=trainer)
Example #3
0
def train(args, logdir):

    # model
    model = Net()

    # dataflow
    df = NetDataFlow(hp.train.data_path, hp.train.batch_size)

    # set logger for event and model saver
    logger.set_logger_dir(logdir)

    session_conf = tf.ConfigProto(
        gpu_options=tf.GPUOptions(
            allow_growth=True,
        ),)
    session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45  # 占用GPU90%的显存

    train_conf = TrainConfig(
        model=model,
        data=QueueInput(df(n_prefetch=1000, n_thread=4)),
        callbacks=[
            ModelSaver(checkpoint_dir=logdir),
            # TODO EvalCallback()
        ],
        max_epoch=hp.train.num_epochs,
        steps_per_epoch=hp.train.steps_per_epoch,
        # session_config=session_conf
    )
    ckpt = '{}/{}'.format(logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir)
    if ckpt:
        train_conf.session_init = SaverRestore(ckpt)

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
        train_conf.nr_tower = len(args.gpu.split(','))

    trainer = SyncMultiGPUTrainerReplicated(hp.train.num_gpu)

    launch_train_with_config(train_conf, trainer=trainer)
def train(args, logdir2):
    # model
    model = Net2()

    # dataflow
    df = Net2DataFlow(hp.train2.mel_path, hp.train2.ppgs_path,
                      hp.train2.batch_size)
    session_inits = []
    ckpt2 = '{}/{}'.format(
        logdir2,
        args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2)
    if ckpt2:
        session_inits.append(SaverRestore(ckpt2))
    '''
    ckpt1 = tf.train.latest_checkpoint(logdir1)
    if ckpt1:
        session_inits.append(SaverRestore(ckpt1, ignore=['global_step']))
    '''
    train_conf = TrainConfig(
        model=model,
        data=QueueInput(df(n_prefetch=1000, n_thread=4)),
        callbacks=[
            # TODO save on prefix net2
            ModelSaver(checkpoint_dir=logdir2),
            # ConvertCallback(logdir2, hp.train2.test_per_epoch),
        ],
        max_epoch=hp.train2.num_epochs,
        steps_per_epoch=hp.train2.steps_per_epoch,
        session_init=ChainInit(session_inits))
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
        train_conf.nr_tower = len(args.gpu.split(','))

    trainer = SyncMultiGPUTrainerReplicated(hp.train2.num_gpu)
    print("strated trainer")
    launch_train_with_config(train_conf, trainer=trainer)
Example #5
0
    # dataset
    dataset = Dataset(hp.data_path,
                      hp.train.batch_size,
                      length=hp.signal.length)
    print('dataset size is {}'.format(len(dataset.wav_files)))

    # set logger for event and model saver
    logger.set_logger_dir(hp.logdir)

    train_conf = TrainConfig(
        model=model,
        data=TFDatasetInput(dataset()),
        callbacks=[
            ModelSaver(checkpoint_dir=hp.logdir),
            RunUpdateOps()  # for batch norm, exponential moving average
            # TODO GenerateCallback()
        ],
        max_epoch=hp.train.num_epochs,
        steps_per_epoch=hp.train.steps_per_epoch,
    )
    ckpt = '{}/{}'.format(
        hp.logdir, ckpt) if ckpt else tf.train.latest_checkpoint(hp.logdir)
    if ckpt:
        train_conf.session_init = SaverRestore(ckpt)

    if gpu is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(map(str, gpu))
        train_conf.nr_tower = len(gpu)

    if hp.train.num_gpu <= 1:
Example #6
0
                                  hp.train.tar_labels,
                                  hp.train.ntar_labels,
                                  length=hp.signal.length,
                                  tar_ratio=hp.train.tar_ratio)

        # set logger for event and model saver
        logger.set_logger_dir(hp.logdir)

        model = globals()[hp.model]()
        print("Model name: {}".format(hp.model))
        train_conf = TrainConfig(
            model=model,
            data=TFDatasetInput(dataset.train.get_dataset()),
            callbacks=[
                ModelSaver(checkpoint_dir=hp.logdir),
                EvalCallback(),
                # RunUpdateOps()  # enable this when using batch normalization.
            ],
            max_epoch=hp.train.num_epochs,
            steps_per_epoch=hp.train.steps_per_epoch,
        )
        ckpt = '{}/{}'.format(
            hp.logdir, ckpt) if ckpt else tf.train.latest_checkpoint(hp.logdir)
        if ckpt:
            train_conf.session_init = SaverRestore(ckpt)

        if gpu is not None:
            os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(map(str, gpu))
            train_conf.nr_tower = len(gpu)

        if hp.train.num_gpu <= 1:
Example #7
0
    # dataflow
    audio_meta = AudioMeta(hp.train.data_path)
    if args.remote:
        df = get_remote_dataflow(args.port, hp.train.batch_size)
    else:
        df = DataLoader(audio_meta, hp.train.batch_size).dataflow(nr_prefetch=5000, nr_thread=int(multiprocessing.cpu_count() // 1.5))

    # set logger for event and model saver
    logger.set_logger_dir(hp.logdir)
    if True:
        train_conf = TrainConfig(
            model=ClassificationModel(num_classes=audio_meta.num_speaker, **hp.model),
            data=FlexibleQueueInput(df, capacity=500),
            callbacks=[
                ModelSaver(checkpoint_dir=hp.logdir),
                EvalCallback()
            ],
            steps_per_epoch=hp.train.steps_per_epoch,
            # session_config=session_config
        )

        ckpt = args.ckpt if args.ckpt else tf.train.latest_checkpoint(hp.logdir)
        if ckpt and not args.r:
            train_conf.session_init = SaverRestore(ckpt)

        if args.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
            train_conf.nr_tower = len(args.gpu.split(','))

        trainer = SyncMultiGPUTrainerReplicated(hp.train.num_gpu)