Python coo_matrix_to_hdf5 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tg.utils

메소드/함수: coo_matrix_to_hdf5

hotexamples.com에서의 예제들: 2

Python coo_matrix_to_hdf5 - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tg.utils.coo_matrix_to_hdf5에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def extend_samples(samp_hdf_fname, tdict_pkl_fname, reverse_tdict_pkl_fname,
                   ext_hdf_fname, max_samp=None):
    log.info("opening original samples file " + samp_hdf_fname)
    samp_hdfile = h5py.File(samp_hdf_fname, "r") 
    
    ext_mat = make_extension_matrix(samp_hdfile, tdict_pkl_fname, reverse_tdict_pkl_fname)
    
    log.info("creating extended samples file " + ext_hdf_fname)
    ext_hdfile = h5py.File(ext_hdf_fname, "w") 
    ext_samples = ext_hdfile.create_group("samples") 
    
    log.info("copying vocabulary ({0} terms)".format(len(samp_hdfile["vocab"])))
    ext_hdfile.create_dataset("vocab", data=samp_hdfile["vocab"])
    i = 0
    
    for lemma, lemma_group in samp_hdfile["samples"].iteritems():
        for pos, pos_group in lemma_group.iteritems():
            log.info(u"{0}: creating extended samples for {1}/{2}".format(i, lemma,pos))
            samp_mat = coo_matrix_from_hdf5(pos_group).tocsr()
            mat = (samp_mat * ext_mat).tocoo()
            group = ext_hdfile.create_group(u"samples/{0}/{1}".format(lemma,pos))
            coo_matrix_to_hdf5(mat, group, data_dtype="i1", compression="gzip")
            
            i += 1
            if i == max_samp:
                log.info("reached maximum number of samples")
                break
        if i == max_samp:
            break
    
    log.info("closing " + samp_hdf_fname)
    samp_hdfile.close()          

    log.info("closing " + ext_hdf_fname)
    ext_hdfile.close()

예제 #2

파일 보기

def make_new_samples(sample_hdfile, filtered_hdfile, columns_selector):
    org_samples = sample_hdfile["samples"]
    filtered_samples = filtered_hdfile.create_group("samples")

    for lemma, lemma_group in org_samples.iteritems():
        for pos, pos_group in lemma_group.iteritems():
            lempos = lemma + u"/" + pos
            log.info("adding filtered samples for " + lempos)
            sample_mat = coo_matrix_from_hdf5(pos_group)
            sample_mat = sample_mat.tocsc()
            # select only columns corresponding to filtered vocabulary,
            # removing other columns
            sample_mat = sample_mat[:, columns_selector]
            # get indices of non-empty rows
            sample_mat = sample_mat.tolil()
            rows_selector = sample_mat.rows.nonzero()[0]
            # select only non-empty rows, removing empty rows
            sample_mat = sample_mat.tocsr()
            sample_mat = sample_mat[rows_selector]
            sample_mat = sample_mat.tocoo()
            filtered_group = filtered_samples.create_group(lempos)
            coo_matrix_to_hdf5(sample_mat, filtered_group, data_dtype="=i1", compression="gzip")