Python DataInterface.DataInterface 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: lisa.core.data_interface

클래스/타입: DataInterface

메소드/함수: DataInterface

hotexamples.com에서의 예제들: 4

Python DataInterface.DataInterface - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 lisa.core.data_interface.DataInterface.DataInterface에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

load_genome(5)

DataInterface(4)

create_binding_dataset(2)

get_metadata_headers(2)

add_binding_data(1)

add_profile_data(1)

add_rp_map(1)

append_csr(1)

build_binned_rp_map(1)

get_rp_map(1)

get_rp_maps(1)

get_window_bedfile_str(1)

list_binding_datasets(1)

예제 #1

파일 보기

def main(species,
         window_size,
         motif_metadata,
         bin_sorted_hits,
         group_loci=100000):

    motif_metadata = pd.read_csv(motif_metadata, sep='\t', header=None)
    motif_metadata.columns = ['dataset_id', 'factor', 'source']
    motif_metadata = motif_metadata.set_index('dataset_id')
    motif_metadata = motif_metadata.drop_duplicates()

    data = DataInterface(species,
                         window_size=window_size,
                         download_if_not_exists=False,
                         make_new=False,
                         load_genes=False)

    print(data.path)
    raise Exception()

    data.create_binding_dataset(TECHNOLOGY, motif_metadata.index.values,
                                **motif_metadata.to_dict('list'))

    id_to_idx_map = dict(
        zip(data.list_binding_datasets(TECHNOLOGY),
            np.arange(len(data.list_binding_datasets(TECHNOLOGY)))))

    current_pos = 0
    last_added_chunk = 0
    i = 0
    rows, cols, scores = [], [], []

    with open(bin_sorted_hits, 'r') as f:

        for line in f:
            motif_id, bin_num, score = line.strip().split()

            bin_num = int(bin_num)

            if bin_num < current_pos:
                raise Exception('Input file not sorted!')
            elif bin_num > current_pos and i >= group_loci:
                print('Adding matrix segment ...')
                matrix_form = sparse.coo_matrix((scores, (rows, cols))).tocsr()
                data.append_csr(TECHNOLOGY, matrix_form)
                last_added_chunk = bin_num
                i = 0
                rows, cols, scores = [], [], []

            tf_idx = id_to_idx_map[motif_id]
            rows.append(bin_num - last_added_chunk)
            cols.append(tf_idx)
            scores.append(int(score))
            current_pos = bin_num
            i += 1

        if len(rows) > 0:
            matrix_form = sparse.coo_matrix((scores, (rows, cols))).tocsr()
            data.append_csr(TECHNOLOGY, matrix_form)

예제 #2

파일 보기

파일: append_profiles.py 프로젝트: liulab-dfci/lisa2

def main(args):

    cistrome_metadata = pd.read_csv(args.cistrome_metadata,
                                    sep='\t').set_index('DCid')
    cistrome_metadata.index = cistrome_metadata.index.astype(str)

    data = DataInterface(args.species,
                         window_size=args.window_size,
                         download_if_not_exists=False,
                         make_new=False,
                         load_genes=True)

    rp_map_styles = data.get_rp_maps()

    if len(rp_map_styles) == 0:

        basic_rp_map, enhanced_rp_map = data.build_binned_rp_map(
            'basic', 10000), data.build_binned_rp_map('enhanced', 10000)

        data.add_rp_map('basic_10K', basic_rp_map)
        data.add_rp_map('enhanced_10K', enhanced_rp_map)

    else:

        basic_rp_map = data.get_rp_map('basic_10K')
        enhanced_rp_map = data.get_rp_map('enhanced_10K')

    for arr_name in args.coverage_arrays:

        coverage_array = np.load(arr_name)

        technology, dataset_id = os.path.basename(arr_name).split('_')

        dataset_id = '.'.join(dataset_id.split('.')[:-1])

        metadata_headers = data.get_metadata_headers(technology)

        meta_dict = cistrome_metadata.loc[dataset_id,
                                          metadata_headers].to_dict()

        data.add_profile_data(technology, dataset_id, coverage_array,
                              [basic_rp_map, enhanced_rp_map],
                              ['basic_10K', 'enhanced_10K'], **meta_dict)

예제 #3

파일 보기

def main(species, window_size, cistrome_metadata, motif_metadata, index_files):

    cistrome_metadata = pd.read_csv(cistrome_metadata,
                                    sep='\t').set_index('DCid')
    cistrome_metadata.index = cistrome_metadata.index.astype(str)
    motif_metadata = pd.read_csv(motif_metadata, sep='\t', header=None)
    motif_metadata.columns = ['dataset_id', 'factor', 'source']
    motif_metadata = motif_metadata.set_index('dataset_id')
    motif_metadata = motif_metadata.drop_duplicates()

    data = DataInterface(species,
                         window_size=window_size,
                         download_if_not_exists=False,
                         make_new=False,
                         load_genes=False)

    dataset_ids = motif_metadata.index.values

    data.create_binding_dataset('Motif', dataset_ids)

예제 #4

파일 보기

def main(species, window_size, cistrome_metadata, motif_metadata, index_files):

    cistrome_metadata = pd.read_csv(cistrome_metadata,
                                    sep='\t').set_index('DCid')
    cistrome_metadata.index = cistrome_metadata.index.astype(str)
    motif_metadata = pd.read_csv(motif_metadata, sep='\t', header=None)
    motif_metadata.columns = ['dataset_id', 'factor', 'source']
    motif_metadata = motif_metadata.set_index('dataset_id')
    motif_metadata = motif_metadata.drop_duplicates()

    data = DataInterface(species,
                         window_size=window_size,
                         download_if_not_exists=False,
                         make_new=False,
                         load_genes=False)

    for index_file in index_files:

        with open(index_file, 'r') as f:
            hit_bins = np.array([int(ind.strip()) for ind in f.readlines()])

        technology, dataset_id = os.path.basename(index_file).split('_')

        dataset_id = '.'.join(dataset_id.split('.')[:-1])

        metadata_headers = data.get_metadata_headers(technology)

        if technology == 'Motifs':
            meta_dict = motif_metadata.loc[dataset_id,
                                           metadata_headers].to_dict()
            meta_dict['source'] = 'jaspar'
        else:
            meta_dict = cistrome_metadata.loc[dataset_id,
                                              metadata_headers].to_dict()

        data.add_binding_data(technology, dataset_id, hit_bins, **meta_dict)