Ejemplo n.º 1
0
    return (end - start + 1) / b.length


def overlap(db, feature):
    overlaps = db.overlaps(feature)

    if len(overlaps) >= args.min_overlap_count and \
       len(overlaps) <= args.max_overlap_count:

        valid = []
        for o in overlaps:
            amt = calc_overlap(o, feature)
            if amt >= args.min_overlap and amt <= args.max_overlap:
                valid.append(o.ID)

        if len(valid) > 0:
            feature.attributes['overlaps'] = ','.join(valid)


if __name__ == '__main__':
    args = parser.parse_args()

    db = PositionDatabase(Feature.from_file(args.reference))
    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    for t in transcripts.values():
        overlap(db, t)

    flat = flatten_tree(chromosomes)
    print '\n'.join([str(f) for f in flat])
Ejemplo n.º 2
0
    return (end - start + 1) / b.length


def overlap(db, feature):
    overlaps = db.overlaps(feature)

    if len(overlaps) >= args.min_overlap_count and \
       len(overlaps) <= args.max_overlap_count: 

        valid = []
        for o in overlaps:
            amt = calc_overlap(o, feature)
            if amt >= args.min_overlap and amt <= args.max_overlap:
                valid.append(o.ID)

        if len(valid) > 0:
            feature.attributes['overlaps'] = ','.join(valid)


if __name__ == '__main__':
    args = parser.parse_args()

    db = PositionDatabase(Feature.from_file(args.reference))
    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    for t in transcripts.values():
        overlap(db, t)

    flat = flatten_tree(chromosomes)
    print '\n'.join([str(f) for f in flat])
Ejemplo n.º 3
0
        ID = x.findtext('.//Iteration_query-def')
        query_len = x.findtext('.//Iteration_query-len')

        hits = x.findall('.//Hit')
        for hit in hits:
            hit_def = hit.findtext('.//Hit_def')
            hit_len = hit.findtext('.//Hit_len')
            identity = hit.findtext('.//Hsp_identity')
            align_len = hit.findtext('.//Hsp_align-len')
            if identity != align_len:
                midline = hit.findtext('.//Hsp_midline')
                gap_match = rx.search(midline)
                if gap_match:
                    continue

            p = (float(identity) / float(hit_len)) * (float(identity) / float(query_len))
            if p >= args.min_match:
                try:
                    t = transcripts[ID]
                except KeyError:
                    pass
                else:
                    try:
                        t.attributes['blasts_to'] = ','.join([t.attributes['blasts_to'],
                                                              hit_def])
                    except KeyError:
                        t.attributes['blasts_to'] = hit_def

    for f in flatten_tree(chromosomes):
        print f