Esempio n. 1
0
    def load_data(self, split='train', load_mel_only=False):
        ''' Load data for training / testing'''
        if split == 'train':
            self.verbose('Loading source data ' +
                         str(self.config['dataloader']['train_set']) +
                         ' from ' + self.config['dataloader']['data_path'])
            if self.duo_feature:
                self.verbose('Loading target data ' +
                             str(self.config['dataloader']['train_set']) +
                             ' from ' +
                             self.config['dataloader']['target_path'])
        elif split == 'test':
            self.verbose('Loading testing data ' +
                         str(self.config['dataloader']['test_set']) +
                         ' from ' + self.config['dataloader']['data_path'])
        else:
            raise NotImplementedError('Invalid `split` argument!')

        if self.duo_feature and not load_mel_only:
            setattr(self, 'dataloader', get_Dataloader(split, load='duo', use_gpu=self.paras.gpu, \
                    mock_config=self.config['mockingjay'], **self.config['dataloader'])) # Currently the duo feature dataloader only supports mockingjay training, no need to specify `run_mockingjay`
        else:
            setattr(self, 'dataloader', get_Dataloader(split, load='acoustic', use_gpu=self.paras.gpu, \
                    run_mockingjay=True if not load_mel_only else False, mock_config=self.config['mockingjay'], \
                    **self.config['dataloader'])) # specify `run_mockingjay` so dataloader will process mockingjay MAM data
Esempio n. 2
0
def get_dataloader(args, dataloader_config):
    pretrain_config = torch.load(args.ckpt, map_location='cpu')['Settings']['Config']
    if 'online' in pretrain_config:
        dataloader_config['online_config'] = pretrain_config['online']

    if not os.path.exists(dataloader_config['data_path']):
        raise RuntimeError('[run_downstream] - Data path not valid:', dataloader_config['data_path'])    
    print('[run_downstream] - Loading input data: ' + str(dataloader_config['train_set']) + ' from ' + dataloader_config['data_path'])
    
    if args.task == 'speaker':
        print('[run_downstream] - Loading speaker data: ' + str(dataloader_config['train_set']) + ' from ' + dataloader_config['data_path'])
    else:
        print('[run_downstream] - Loading phone data: ' + dataloader_config['phone_path'])
        if not os.path.exists(dataloader_config['phone_path']):
            raise RuntimeError('[run_downstream] - Phone path not valid:', dataloader_config['phone_path'])
        if args.task == 'montreal_phone':
            print('[run_downstream] - WARNING: Using a non-preset phone set! Please make sure \'data_path\' (should be: data/libri_mel160_subword5000) and \'phone_path\' (should be: data/libri_phone) are set correctly.')

    print('[run_downstream] - getting train dataloader...')
    train_loader = get_Dataloader(split='train', load=args.task, use_gpu=args.gpu, seed=args.seed, **dataloader_config)

    print('[run_downstream] - getting dev dataloader...')
    dev_loader = get_Dataloader(split='dev', load=args.task, use_gpu=args.gpu, seed=args.seed, **dataloader_config)

    print('[run_downstream] - getting test dataloader...')
    test_loader = get_Dataloader(split='test', load=args.task, use_gpu=args.gpu, seed=args.seed, **dataloader_config)
    
    return train_loader, dev_loader, test_loader
def get_dataloader(args, dataloader_config):

    if not os.path.exists(dataloader_config['data_path']):
        raise RuntimeError('[run_downstream] - Data path not valid:',
                           dataloader_config['data_path'])
    print('[run_downstream] - Loading input data: ' +
          str(dataloader_config['train_set']) + ' from ' +
          dataloader_config['data_path'])
    if args.task == 'speaker':
        print('[run_downstream] - Loading speaker data: ' +
              str(dataloader_config['train_set']) + ' from ' +
              dataloader_config['data_path'])
    else:
        print('[run_downstream] - Loading phone data: ' +
              dataloader_config['phone_path'])

    print('[run_downstream] - getting train dataloader...')
    train_loader = get_Dataloader(split='train',
                                  load=args.task,
                                  use_gpu=args.gpu,
                                  **dataloader_config)

    print('[run_downstream] - getting dev dataloader...')
    dev_loader = get_Dataloader(split='dev',
                                load=args.task,
                                use_gpu=args.gpu,
                                **dataloader_config)

    print('[run_downstream] - getting test dataloader...')
    test_loader = get_Dataloader(split='test',
                                 load=args.task,
                                 use_gpu=args.gpu,
                                 **dataloader_config)

    return train_loader, dev_loader, test_loader
 def load_data(self):
     ''' Load date for training/validation'''
     self.verbose('Loading data from ' + self.config['solver']['data_path'])
     setattr(self, 'train_set', get_Dataloader('train', load='asr', use_gpu=self.paras.gpu, **self.config['solver']))
     setattr(self, 'dev_set', get_Dataloader('dev',load='asr', use_gpu=self.paras.gpu, **self.config['solver']))
     
     # Get 1 example for auto constructing model
     for self.sample_x, _ in getattr(self,'train_set'): break
     if len(self.sample_x.shape) == 4: self.sample_x = self.sample_x[0]
Esempio n. 5
0
 def load_data(self):
     self.verbose('Loading testing data '+str(self.config['solver']['test_set'])\
                  +' from '+self.config['solver']['data_path'])
     setattr(
         self, 'test_set',
         get_Dataloader('test',
                        load='asr',
                        use_gpu=self.paras.gpu,
                        **self.config['solver']))
     setattr(
         self, 'dev_set',
         get_Dataloader('dev',
                        load='asr',
                        use_gpu=self.paras.gpu,
                        **self.config['solver']))
Esempio n. 6
0
def get_dataloader(args, config):
    
    if not os.path.exists(config['dataloader']['data_path']):
        raise RuntimeError('[run_upstream] - Data path not valid:', config['dataloader']['data_path'])
    print('[run_upstream] - Loading input data: ' + str(config['dataloader']['train_set']) + ' from ' + config['dataloader']['data_path'])
    print('[run_upstream] - getting train dataloader...')

    # select mode
    try: 
        if config['transformer']['dual_transformer'] and config['transformer']['wave_transformer']:
            raise ValueError('`dual_transformer` and `wave_transformer` can not both be True!')
    except: pass
    if 'dual_transformer' in config['transformer']:
        load = 'dual_acoustic' if config['transformer']['dual_transformer'] else 'acoustic'
    if 'wave_transformer' in config['transformer']:
        load = 'wave_acoustic' if config['transformer']['wave_transformer'] else 'acoustic'
    else:
        load = 'duo' if bool(config['runner']['duo_feature']) else 'kaldi' if args.kaldi_data else 'acoustic'

    # print path info
    if load == 'duo': 
        print('[run_upstream] - Loading duo data: ' + str(config['dataloader']['train_set']) + ' from ' + config['dataloader']['target_path'])
    elif load == 'kaldi':
        print('[run_upstream] - Loading Kaldi data: ' + str(config['dataloader']['data_path']) + ' from these sets ' + str(config['dataloader']['train_set']))
    elif load == 'wave_acoustic':
        print('[run_upstream] - Loading wave data: ' + str(config['online']['libri_root']) + ' from these sets ' + str(config['dataloader']['train_set']))
    elif load == 'acoustic' and 'online' in config:
        print('[run_upstream] - Using online data from root: ' + str(config['online']['libri_root']))
    elif load == 'acoustic':
        print('[run_upstream] - Loading data: ' + str(config['dataloader']['data_path']) + ' from these sets ' + str(config['dataloader']['train_set']))

    dataloader = get_Dataloader(split='train', load=load, use_gpu=args.gpu, 
                                run_mam=True, mam_config=config['transformer'], **config['dataloader'], **config)

    return dataloader
Esempio n. 7
0
 def load_data(self):
     ''' Load training / dev set'''
     self.verbose('Loading text data from ' +
                  self.config['solver']['data_path'])
     setattr(
         self, 'train_set',
         get_Dataloader('train',
                        load='text',
                        use_gpu=self.paras.gpu,
                        **self.config['solver']))
     setattr(
         self, 'dev_set',
         get_Dataloader('dev',
                        load='text',
                        use_gpu=self.paras.gpu,
                        **self.config['solver']))
Esempio n. 8
0
 def load_data(self, split='train'):
     ''' Load data for training / testing'''
     if split == 'train':
         self.verbose('Loading source data from ' +
                      str(self.config.train_set) + ' from ' +
                      self.config.data_path)
     elif split == 'test':
         self.verbose('Loading testing data ' + str(self.config.test_set) +
                      ' from ' + self.config.data_path)
     else:
         raise NotImplementedError('Invalid `split` argument!')
     setattr(
         self, 'dataloader',
         get_Dataloader(split,
                        load='spec',
                        data_path=self.config.data_path,
                        batch_size=self.config.batch_size,
                        max_timestep=3000,
                        max_label_len=400,
                        use_gpu=True,
                        n_jobs=self.config.load_data_workers,
                        train_set=self.config.train_set,
                        dev_set=self.config.dev_set,
                        test_set=self.config.test_set,
                        dev_batch_size=1))
Esempio n. 9
0
    def load_data(self, split='train', load='phone'):
        ''' Load date for training / testing'''
        assert(load in ['phone', 'cpc_phone', 'sentiment', 'speaker', 'speaker_large']), 'Unsupported dataloader!'
        if load == 'phone' or load == 'cpc_phone' or load == 'speaker_large':
            if split == 'train':
                self.verbose('Loading source data from ' + str(self.config['dataloader']['train_set']) + ' from ' + self.config['dataloader']['data_path'])
                if load == 'phone' or load == 'cpc_phone': self.verbose('Loading phone data from ' + str(self.config['dataloader']['train_set']) + ' from ' + self.config['dataloader']['phone_path'])
            elif split == 'test': 
                if load != 'cpc_phone': self.verbose('Loading testing data ' + str(self.config['dataloader']['test_set']) + ' from ' + self.config['dataloader']['data_path'])
                if load == 'phone': self.verbose('Loading label data ' + str(self.config['dataloader']['test_set']) + ' from ' + self.config['dataloader']['phone_path'])
                elif load == 'cpc_phone': self.verbose('Loading label data from ' + self.config['dataloader']['phone_path'])
            else:
                raise NotImplementedError('Invalid `split` argument!')
        elif load == 'speaker':
            if split == 'train':
                self.verbose('Loading source data from ' + str(self.config['dataloader']['train_set']).replace('360', '100') + ' from ' + self.config['dataloader']['data_path'])
            elif split == 'test':
                self.verbose('Loading testing data ' + str(self.config['dataloader']['test_set']).replace('360', '100') + ' from ' + self.config['dataloader']['data_path'])
            else:
                raise NotImplementedError('Invalid `split` argument!')
        elif load == 'sentiment':
            target = self.config['dataloader']['sentiment_config']['dataset']
            sentiment_path = self.config['dataloader']['sentiment_config'][target]['path']
            self.verbose(f'Loading {split} data from {sentiment_path}')
        else:
            raise NotImplementedError('Unsupported downstream tasks.')

        setattr(self, 'dataloader', get_Dataloader(split, load=load, use_gpu=self.paras.gpu, \
                run_mockingjay=self.run_mockingjay, mock_config=self.config['mockingjay'], \
                **self.config['dataloader']))
Esempio n. 10
0
def get_dataloader(args, config):

    if not os.path.exists(config['dataloader']['data_path']):
        raise RuntimeError('[run_upstream] - Data path not valid:',
                           config['dataloader']['data_path'])
    print('[run_upstream] - Loading input data: ' +
          str(config['dataloader']['train_set']) + ' from ' +
          config['dataloader']['data_path'])
    print('[run_upstream] - getting train dataloader...')

    load = 'duo' if bool(config['runner']['duo_feature']
                         ) else 'kaldi' if args.kaldi_data else 'acoustic'
    if load == 'duo':
        print('[run_upstream] - Loading duo data: ' +
              str(config['dataloader']['train_set']) + ' from ' +
              config['dataloader']['target_path'])
    if load == 'kaldi':
        print('[run_upstream] - Loading Kaldi data: ' +
              str(config['dataloader']['data_path']) + ' from these sets ' +
              str(config['dataloader']['train_set']))

    dataloader = get_Dataloader(split='train',
                                load=load,
                                use_gpu=args.gpu,
                                run_mam=True,
                                mam_config=config['transformer'],
                                **config['dataloader'])

    return dataloader
Esempio n. 11
0
    def load_data(self, split='train'):
        ''' Load data for training / testing'''
        if split == 'train':
            self.verbose('Loading source data ' +
                         str(self.config['dataloader']['train_set']) +
                         ' from ' + self.config['dataloader']['data_path'])
            if self.duo_feature:
                self.verbose('Loading target data ' +
                             str(self.config['dataloader']['train_set']) +
                             ' from ' +
                             self.config['dataloader']['target_path'])
        elif split == 'test':
            self.verbose('Loading testing data ' +
                         str(self.config['dataloader']['test_set']) +
                         ' from ' + self.config['dataloader']['data_path'])
        else:
            raise NotImplementedError('Invalid `split` argument!')

        if self.duo_feature:
            setattr(self, 'dataloader', get_Dataloader(split, load='duo', use_gpu=self.paras.gpu, \
                    mam_config=self.transformer_config, **self.config['dataloader'])) # run_mam is automatically performed
        else:
            setattr(self, 'dataloader', get_Dataloader(split, load='acoustic', use_gpu=self.paras.gpu, run_mam=True, \
                    mam_config=self.transformer_config, **self.config['dataloader']))
Esempio n. 12
0
 def load_text(self, data_config):
     # Independent training set for CLM
     self.train_set = get_Dataloader('text', text_only=True, **data_config)
     self.data_iter = iter(self.train_set)