Beispiel #1
0
def test_Feature():
    a = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', 
                   '.', 'ID=Gene1;Parent=p'])
    f = Feature.from_string(a)
    eq_('Gene1', f.ID)
    eq_(['p'], f.parents)

    b = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', 
                   '.', 'ID=Gene1;Parent=p,q'])
    f = Feature.from_string(b)
    eq_(['p', 'q'], f.parents)

    c = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', 
                   '.', ''])
    f = Feature.from_string(c)
    eq_('', f.ID)
    eq_([], f.parents)
Beispiel #2
0
def test_transcript_splice_junctions():
    f = dummy('g.gff')
    features = Feature.from_file(f.name)
    juncs = transcript_splice_junctions(features)
    eq_({
        'Chr1_40.1': [220, 302],
        'Chr1_364.1': [83],
        'Chr1_366.1': [90, 196, 326, 535],
    }, juncs)
Beispiel #3
0
def test_Feature():
    a = '\t'.join([
        'Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', 'ID=Gene1;Parent=p'
    ])
    f = Feature.from_string(a)
    eq_('Gene1', f.ID)
    eq_(['p'], f.parents)

    b = '\t'.join([
        'Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.',
        'ID=Gene1;Parent=p,q'
    ])
    f = Feature.from_string(b)
    eq_(['p', 'q'], f.parents)

    c = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', ''])
    f = Feature.from_string(c)
    eq_('', f.ID)
    eq_([], f.parents)
Beispiel #4
0
def test_transcript_splice_junctions():
    f = dummy('g.gff')
    features = Feature.from_file(f.name)
    juncs = transcript_splice_junctions(features)
    eq_(
        {
            'Chr1_40.1': [220, 302],
            'Chr1_364.1': [83],
            'Chr1_366.1': [90, 196, 326, 535],
        }, juncs)
Beispiel #5
0
    return (end - start + 1) / b.length


def overlap(db, feature):
    overlaps = db.overlaps(feature)

    if len(overlaps) >= args.min_overlap_count and \
       len(overlaps) <= args.max_overlap_count: 

        valid = []
        for o in overlaps:
            amt = calc_overlap(o, feature)
            if amt >= args.min_overlap and amt <= args.max_overlap:
                valid.append(o.ID)

        if len(valid) > 0:
            feature.attributes['overlaps'] = ','.join(valid)


if __name__ == '__main__':
    args = parser.parse_args()

    db = PositionDatabase(Feature.from_file(args.reference))
    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    for t in transcripts.values():
        overlap(db, t)

    flat = flatten_tree(chromosomes)
    print '\n'.join([str(f) for f in flat])
 def mapper_init(self):
     """TODO"""
     features = Feature.from_file(self.options.reference)
     self.junctions = transcript_splice_junctions(features)
Beispiel #7
0
    return (end - start + 1) / b.length


def overlap(db, feature):
    overlaps = db.overlaps(feature)

    if len(overlaps) >= args.min_overlap_count and \
       len(overlaps) <= args.max_overlap_count:

        valid = []
        for o in overlaps:
            amt = calc_overlap(o, feature)
            if amt >= args.min_overlap and amt <= args.max_overlap:
                valid.append(o.ID)

        if len(valid) > 0:
            feature.attributes['overlaps'] = ','.join(valid)


if __name__ == '__main__':
    args = parser.parse_args()

    db = PositionDatabase(Feature.from_file(args.reference))
    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    for t in transcripts.values():
        overlap(db, t)

    flat = flatten_tree(chromosomes)
    print '\n'.join([str(f) for f in flat])
Beispiel #8
0
 def mapper_init(self):
     """TODO"""
     features = Feature.from_file(self.options.reference)
     self.junctions = transcript_splice_junctions(features)
Beispiel #9
0
            hits = sum(counts[feature.ID].values())
        except KeyError:
            hits = 0

        # coverage is RPKM, reads per kilobase of reference per million mapped reads
        # http://www.clcbio.com/manual/genomics/Definition_RPKM.html
        try:
            return (math.pow(10, 9) * hits) / (self.total * feature.length)
        except ZeroDivisionError:
            return 0


if __name__ == '__main__':
    args = parser.parse_args()

    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    # TODO would be nice to split filters out into predicate functions
    for transcript in transcripts.values():

        exons = len([x for x in transcript.children if x.type == 'exon'])

        if args.counts:
            counts = Counts.from_file(args.counts)
            coverage = counts.coverage(transcript)
        else:
            coverage = 0

        if transcript.length < args.min_length or transcript.length > args.max_length \
        or exons < args.min_exons or exons > args.max_exons \
        or coverage < args.min_coverage or coverage > args.max_coverage:
Beispiel #10
0
            hits = sum(counts[feature.ID].values())
        except KeyError:
            hits = 0
        
        # coverage is RPKM, reads per kilobase of reference per million mapped reads
        # http://www.clcbio.com/manual/genomics/Definition_RPKM.html
        try:
            return (math.pow(10, 9) * hits) / (self.total * feature.length)
        except ZeroDivisionError:
            return 0
                    
    
if __name__ == '__main__':
    args = parser.parse_args()

    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    # TODO would be nice to split filters out into predicate functions
    for transcript in transcripts.values():

        exons = len([x for x in transcript.children if x.type == 'exon'])

        if args.counts:
            counts = Counts.from_file(args.counts)
            coverage = counts.coverage(transcript)
        else:
            coverage = 0

        if transcript.length < args.min_length or transcript.length > args.max_length \
        or exons < args.min_exons or exons > args.max_exons \
        or coverage < args.min_coverage or coverage > args.max_coverage: