Python build_transcript_cluster_map 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: chimerascan.lib.gene_to_genome

메소드/함수: build_transcript_cluster_map

hotexamples.com에서의 예제들: 2

Python build_transcript_cluster_map - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 chimerascan.lib.gene_to_genome.build_transcript_cluster_map에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: filter_chimeras.py 프로젝트: bioinfo-dirty-jobs/chimerascan-vrl

def get_highest_coverage_isoforms(input_file, gene_file):
    # place overlapping chimeras into clusters
    logging.debug("Building isoform cluster lookup table")
    transcript_cluster_map = build_transcript_cluster_map(open(gene_file))
    # build a lookup table to get genome coordinates from transcript 
    # coordinates
    transcript_genome_map = build_transcript_genome_map(open(gene_file))
    cluster_chimera_dict = collections.defaultdict(lambda: [])
    for c in Chimera.parse(open(input_file)):
        # TODO: adjust this to score chimeras differently!
        key = (c.name, c.get_num_frags())
        # get cluster of overlapping genes
        cluster5p = transcript_cluster_map[c.tx_name_5p]
        cluster3p = transcript_cluster_map[c.tx_name_3p]
        # get genomic positions of breakpoints
        coord5p = transcript_to_genome_pos(c.tx_name_5p, c.tx_end_5p-1, transcript_genome_map)
        coord3p = transcript_to_genome_pos(c.tx_name_3p, c.tx_start_3p, transcript_genome_map)
        # add to dictionary
        cluster_chimera_dict[(cluster5p,cluster3p,coord5p,coord3p)].append(key)    
    # choose highest coverage chimeras within each pair of clusters
    logging.debug("Finding highest coverage isoforms")
    kept_chimeras = set()
    for stats_list in cluster_chimera_dict.itervalues():
        stats_dict = collections.defaultdict(lambda: set())
        for stats_info in stats_list:
            # index chimera names
            stats_dict[stats_info[1:]].add(stats_info[0])
        # find highest scoring key
        sorted_keys = sorted(stats_dict.keys(), reverse=True)
        kept_chimeras.update(stats_dict[sorted_keys[0]])
    return kept_chimeras

예제 #2

파일 보기

파일: write_output.py 프로젝트: bioinfo-dirty-jobs/chimerascan-vrl

def get_chimera_groups(input_file, gene_file):
    # build a lookup table to get gene clusters from transcript name    
    transcript_cluster_map = build_transcript_cluster_map(open(gene_file))
    # build a lookup table to get genome coordinates from transcript 
    # coordinates
    # TODO: can either group by exact breakpoint, or just by
    # gene cluster
    # transcript_genome_map = build_transcript_genome_map(open(gene_file))
    # group chimeras in the same genomic cluster with the same
    # breakpoint
    cluster_chimera_dict = collections.defaultdict(lambda: [])
    for c in Chimera.parse(open(input_file)):
        # get cluster of overlapping genes
        cluster5p = transcript_cluster_map[c.tx_name_5p]
        cluster3p = transcript_cluster_map[c.tx_name_3p]
        # get genomic positions of breakpoints
        #coord5p = transcript_to_genome_pos(c.partner5p.tx_name, c.partner5p.end-1, transcript_genome_map)
        #coord3p = transcript_to_genome_pos(c.partner3p.tx_name, c.partner3p.start, transcript_genome_map)
        # add to dictionary
        cluster_chimera_dict[(cluster5p,cluster3p)].append(c)
        # TODO: use this grouping instead?
        #cluster_chimera_dict[(cluster5p,cluster3p,coord5p,coord3p)].append(c)
    for key,chimeras in cluster_chimera_dict.iteritems():
        yield key,chimeras