Example #1
0
def get_data(**kwargs):
    ''' Returns data from audio tracks
    '''

    if os.path.exists(DATAFILE_MAT):
        Data = GroupXData.LoadFromFile(DATAFILE_MAT)
    else:
        obs = []
        doc_range = [0]
        count = 0
        with h5py.File('../tracks.h5', 'r') as tracks:
            for track, grp in ProgressBar(tracks.items()):
                if 'gfccs' not in grp:
                    continue
                data = grp['gfccs']
                count += data.shape[0]
                doc_range.append(count)
                obs.append(data.value.astype(np.float64))
        X = np.vstack(obs)
        Data = GroupXData(X=X, doc_range=doc_range)
        Data.save_to_mat(DATAFILE_MAT)
    Data.name = 'AudioCorpus'
    Data.summary = 'Audio Corpus. obs=10.5M docs=559'

    return Data
Example #2
0
    def loadDataForBatch(self, batchID):
        ''' Load the data assigned to a particular batch

        Returns
        -------
        Dchunk : bnpy.data.DataObj subclass
        '''
        dpath = self.datafileList[batchID]
        if dpath.endswith('.ldac'):
            return BagOfWordsData.LoadFromFile_ldac(dpath, **self.DataInfo)
        elif self.dataset_type == 'GroupXData':
            return GroupXData.LoadFromFile(dpath, **self.DataInfo)
        else:
            return XData.read_file(dpath, **self.DataInfo)
Example #3
0
def loadDataForSlice(filepath='', dataset_type='', **kwargs):
    """ Return data object loaded from specific file.

    Keyword args
    ------------
    workerID
    nWorkers
    """
    if filepath.endswith('.ldac'):
        return BagOfWordsData.LoadFromFile_ldac(filepath, **kwargs)
    else:
        if dataset_type == 'GroupXData':
            return GroupXData.LoadFromFile(filepath, **kwargs)
        else:
            return XData.LoadFromFile(filepath, **kwargs)