Ejemplo n.º 1
0
def parse_locus(locus, fh):
    genome_id_str = '%s:%d-%d' % (locus.chrom, locus.start, locus.end)
    logging.debug('LocusIndex: %s coords: %s transfrags: %d' %
                  (locus.name, genome_id_str, locus.num_lines))
    # fast-forward to 'filepos'
    fh.seek(locus.filepos)
    # parse 'num_lines' from file into Transfrag objects
    transfrags = []
    for i in xrange(locus.num_lines):
        transfrags.append(Transfrag.from_bed(fh.next()))
    return transfrags
Ejemplo n.º 2
0
def parse_locus(locus, fh):
    genome_id_str = '%s:%d-%d' % (locus.chrom, locus.start, locus.end)
    logging.debug('LocusIndex: %s coords: %s transfrags: %d' %
                  (locus.name, genome_id_str, locus.num_lines))
    # fast-forward to 'filepos'
    fh.seek(locus.filepos)
    # parse 'num_lines' from file into Transfrag objects
    transfrags = []
    for i in xrange(locus.num_lines):
        transfrags.append(Transfrag.from_bed(fh.next()))
    return transfrags
Ejemplo n.º 3
0
def main():
    logging.basicConfig(level=logging.DEBUG)
    parser = argparse.ArgumentParser()
    parser.add_argument('genome_fasta_file')
    parser.add_argument('bed_file')
    args = parser.parse_args()

    # check args
    if not os.path.exists(args.genome_fasta_file):
        parser.error('genome fasta file %s not found' % args.genome_fasta_file)
    if not os.path.exists(args.bed_file):
        parser.error('bed file %s not found' % args.bed_file)
    logging.info('genome fasta file: %s' % args.genome_fasta_file)
    logging.info('bed file: %s' % args.bed_file)

    # process bed file to get junctions
    logging.info('Reading Junctions')
    splice_juncs = set()
    fasta_fh = FastaFile(args.genome_fasta_file)
    with open(args.bed_file) as bed_fh:
        for line in bed_fh:
            t = Transfrag.from_bed(line)
            if t.chrom not in fasta_fh:
                continue
            for start, end in t.iterintrons():
                splice_juncs.add((t.chrom, start, end, t.strand))
    logging.info('Read %d Junctions' % (len(splice_juncs)))

    logging.info('Profiling Splice Motifs')
    motif_counter = Counter()
    for chrom, start, end, strand in splice_juncs:
        s = fasta_fh.fetch(chrom, start, start + 2)
        s += fasta_fh.fetch(chrom, end - 2, end)
        if strand == Strand.NEG:
            s = dna_reverse_complement(s)
        motif_counter[s] += 1
    fasta_fh.close()

    # report statistics
    total = sum(motif_counter.values())
    print '\t'.join(['motif', 'count', 'frac'])
    for motif, count in motif_counter.most_common():
        print '\t'.join([motif, str(count), str(float(count) / total)])
    logging.info('Done')