예제 #1
0
def view_track(parser, options):
    tf = TrackFactory(options.file, "r")
    if not tf.has_track(options.name):
        tf.close()
        parser.error("trackfactory '%s' does not contain track '%s'" %
                     (options.file, options.name))    
    region = parse_interval(options.region)
    t = tf.get_track(options.name)
    track_type = t.get_type()
    logging.debug("opened track '%s' type '%s'" % (options.name, track_type))        
    if track_type == SequenceTrack.__name__:
        print t[region]
    if track_type == ArrayTrack.__name__:
        if options.file_type == "bedgraph":
            t.tobedgraph(region, sys.stdout)
        else:
            print t[region]
    elif track_type == VectorTrack.__name__:
        if options.file_type == "bedgraph":
            readnum = options.readnum
            allele = options.allele
            t.tobedgraph(region, sys.stdout, norm=True, 
                         read=readnum, allele=allele)
        else:
            print t[region]
    elif track_type == RnaseqTrack.__name__:
        cov_track = t.get_coverage_track()
        print cov_track.density(region)
        junc_track = t.get_junction_track()
        print junc_track[region]
    logging.debug("done")
    tf.close()
예제 #2
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(asctime)s - %(levelname)s - %(message)s")
    parser = argparse.ArgumentParser()
    parser.add_argument("--stranded", dest="stranded", action="store_true", default=False)
    parser.add_argument("--ambiguous", dest="ambiguous", action="store_true", default=False)
    parser.add_argument("--aliases", dest="alias_file", default=None)
    parser.add_argument("bed")
    parser.add_argument("track_files", nargs="+")
    options = parser.parse_args()
    
    alias_dict = {}
    alias_header = []
    if options.alias_file is not None:
        for line in open(options.alias_file):
            if line.startswith("#"):
                alias_header = line.strip()[1:].split('\t')
                continue    
            fields = line.strip().split('\t')
            alias_dict[fields[0]] = fields[1:]
    
    header_fields = alias_header + ["gene_name", "gene_interval", "gene_length"]
    tracks = []
    for track_path in options.track_files:
        name, path = track_path.split("@")
        file_path, h5_path = path.split(":")
        tf = TrackFactory(file_path, "r")
        t = tf.get_track(h5_path)
        tracks.append((name, tf, t, set(t.get_rnames())))
        if options.stranded:
            header_fields.append("%s_sense" % name)
            header_fields.append("%s_antisense" % name)
        else:
            header_fields.append(name)
    # output header
    print '\t'.join(map(str, header_fields))

    # read genes
    if options.ambiguous:
        genes = list(BedGene.parse(open(options.bed)))
    else:
        genes = filter_strand_conflicts(options.bed)
    # get counts
    for g in genes:
        alias_fields = alias_dict.get(g.name, ["None"] * len(alias_header))
        fields = ([g.name] + alias_fields +
                  ["%s[%s]:%d-%d" % (g.chrom, g.strand, g.tx_start, g.tx_end),
                   sum((end-start) for start,end in g.exons)])
        sense_strand = NEG_STRAND if g.strand == "+" else POS_STRAND
        antisense_strand = int(not sense_strand)
        rname_found = False
        for name, tf, t, rnames in tracks:
            if g.chrom not in rnames:
                continue
            rname_found = True        
            if options.stranded:
                sense_count = 0
                antisense_count = 0
                for start, end in g.exons:
                    sense_count += t.count((g.chrom, start, end, sense_strand))
                    antisense_count += t.count((g.chrom, start, end, antisense_strand))
                fields.append(sense_count)
                fields.append(antisense_count)
            else:
                count = 0
                for start, end in g.exons:
                    count += t.count((g.chrom, start, end))
                fields.append(count)
        if rname_found:
            print '\t'.join(map(str, fields))

    for name,tf,t,rnames in tracks:
        tf.close()