Beispiel #1
0
 def _consumer(input_queue, output_queue):
     while True:
         line = input_queue.get()
         if line is None:
             break
         f = BEDFeature.from_string(line)
         # retrieve conservation data
         bigwig_file = chrom_bigwig_dict[f.chrom]
         arr = extract_bigwig_data(f, bigwig_file)
         # measure conservation at various sliding windows
         window_scores = []
         for window_size in window_sizes:
             window_scores.append(best_sliding_window(arr, window_size, np.mean))
         # measure average conservation
         finitearr = arr[np.isfinite(arr)]
         if len(finitearr) == 0:
             mean_cons = np.nan
         else:
             mean_cons = np.mean(finitearr)
         fields = [f.name, '%s:%d-%d[%s]' % (f.chrom, f.tx_start, f.tx_end, f.strand),
                   str(len(arr)), str(mean_cons)]
         fields.extend(map(str,window_scores))
         result = '\t'.join(fields)
         output_queue.put(result)
     output_queue.put(None)
 def _consumer(worker_index, input_queue, output_queue):
     hists = collections.defaultdict(lambda: np.zeros(NUM_BINS-1, dtype=np.float))
     while True:
         line = input_queue.get()
         if line is None:
             break
         f = BEDFeature.from_string(line)
         fields = bed_feature_conservation(f, chrom_bigwig_dict, hists)
         result = '\t'.join(fields)
         output_queue.put(result)
     np.savez('w%d.npz' % (worker_index), **hists)    
     output_queue.put(None)
Beispiel #3
0
def main():
    # parse command line
    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        dest="verbose",
                        default=False)
    parser.add_argument('-p',
                        '--num-processes',
                        type=int,
                        dest='num_processes',
                        default=1)
    parser.add_argument("--pattern",
                        dest="pattern",
                        default=r'{{CHROM}}.phyloP46way.bw')
    parser.add_argument("bigwig_file_dir")
    parser.add_argument("bed_file")
    args = parser.parse_args()
    # set logging level
    if args.verbose:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(
        level=level,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    # check command line parameters
    if which(BIGWIG_TO_BEDGRAPH_BIN) is None:
        parser.error('bigWigToBedGraph binary not found in PATH')
    if not os.path.exists(args.bed_file):
        parser.error("BED file %s not found" % (args.bed_file))
    prefix = os.path.splitext(args.bed_file)[0]
    results_file = prefix + '.results.txt'
    hists_file = prefix + '.hists.npz'
    # find bigwig files
    logging.info("Indexing bigWig files")
    chrom_bigwig_dict = find_bigwig_files(args.bigwig_file_dir, args.pattern)
    # process bed file
    logging.info("Measuring conservation")
    if args.num_processes > 1:
        conservation_parallel(args.bed_file, chrom_bigwig_dict,
                              args.num_processes, results_file, hists_file)
    else:
        hists = collections.defaultdict(
            lambda: np.zeros(NUM_BINS - 1, dtype=np.float))
        with open(results_file, 'w') as outfile:
            for f in BEDFeature.parse(open(args.bed_file)):
                fields = bed_feature_conservation(f, chrom_bigwig_dict, hists)
                print >> outfile, '\t'.join(fields)
        np.savez(hists_file, **hists)
    return 0
Beispiel #4
0
 def _consumer(worker_index, input_queue, output_queue):
     hists = collections.defaultdict(
         lambda: np.zeros(NUM_BINS - 1, dtype=np.float))
     while True:
         line = input_queue.get()
         if line is None:
             break
         f = BEDFeature.from_string(line)
         fields = bed_feature_conservation(f, chrom_bigwig_dict, hists)
         result = '\t'.join(fields)
         output_queue.put(result)
     np.savez('w%d.npz' % (worker_index), **hists)
     output_queue.put(None)
def main():
    # parse command line
    parser = argparse.ArgumentParser()
    parser.add_argument("-v", "--verbose", action="store_true", 
                        dest="verbose", default=False)
    parser.add_argument('-p', '--num-processes', type=int, 
                        dest='num_processes', default=1)
    parser.add_argument("--pattern", dest="pattern", 
                        default=r'{{CHROM}}.phyloP46way.bw')
    parser.add_argument("bigwig_file_dir")
    parser.add_argument("bed_file")
    args = parser.parse_args()
    # set logging level
    if args.verbose:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(level=level,
                        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    # check command line parameters
    if which(BIGWIG_TO_BEDGRAPH_BIN) is None:
        parser.error('bigWigToBedGraph binary not found in PATH')
    if not os.path.exists(args.bed_file):
        parser.error("BED file %s not found" % (args.bed_file))
    prefix = os.path.splitext(args.bed_file)[0]
    results_file = prefix + '.results.txt'
    hists_file = prefix + '.hists.npz'
    # find bigwig files
    logging.info("Indexing bigWig files")
    chrom_bigwig_dict = find_bigwig_files(args.bigwig_file_dir, args.pattern)
    # process bed file
    logging.info("Measuring conservation")
    if args.num_processes > 1:
        conservation_parallel(args.bed_file, chrom_bigwig_dict, args.num_processes,
                              results_file, hists_file)
    else:       
        hists = collections.defaultdict(lambda: np.zeros(NUM_BINS-1, dtype=np.float))
        with open(results_file, 'w') as outfile:
            for f in BEDFeature.parse(open(args.bed_file)):
                fields = bed_feature_conservation(f, chrom_bigwig_dict, hists)
                print >>outfile, '\t'.join(fields)
        np.savez(hists_file, **hists)    
    return 0
Beispiel #6
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    parser = argparse.ArgumentParser()
    parser.add_argument("--source", dest="source", default='bed_to_gtf')
    parser.add_argument("bed_file")
    args = parser.parse_args()
    bed_file = args.bed_file
    source = args.source
    for x in BEDFeature.parse(open(bed_file)):
        f = GTFFeature()
        f.seqid = x.chrom
        f.source = source
        f.feature_type = 'transcript'
        f.start = x.tx_start
        f.end = x.tx_end
        f.score = x.score
        f.strand = x.strand
        f.phase = '.'
        f.attrs = {'transcript_id': x.name,
                   'gene_id': x.name}
        features = [f]
        for i,e in enumerate(x.exons):
            start, end = e
            f = GTFFeature()
            f.seqid = x.chrom
            f.source = source
            f.feature_type = 'exon'
            f.start = start
            f.end = end
            f.score = x.score
            f.strand = x.strand
            f.phase = '.'
            f.attrs = dict(features[0].attrs)
            f.attrs["exon_number"] = i
            features.append(f)
        for f in features:
            print str(f)
Beispiel #7
0
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    parser = argparse.ArgumentParser()
    parser.add_argument("--source", dest="source", default='bed_to_gtf')
    parser.add_argument("bed_file")
    args = parser.parse_args()
    bed_file = args.bed_file
    source = args.source
    for x in BEDFeature.parse(open(bed_file)):
        f = GTFFeature()
        f.seqid = x.chrom
        f.source = source
        f.feature_type = 'transcript'
        f.start = x.tx_start
        f.end = x.tx_end
        f.score = x.score
        f.strand = x.strand
        f.phase = '.'
        f.attrs = {'transcript_id': x.name, 'gene_id': x.name}
        features = [f]
        for i, e in enumerate(x.exons):
            start, end = e
            f = GTFFeature()
            f.seqid = x.chrom
            f.source = source
            f.feature_type = 'exon'
            f.start = start
            f.end = end
            f.score = x.score
            f.strand = x.strand
            f.phase = '.'
            f.attrs = dict(features[0].attrs)
            f.attrs["exon_number"] = i
            features.append(f)
        for f in features:
            print str(f)
Beispiel #8
0
def conservation_serial(bed_file, window_sizes, chrom_bigwig_dict):
    # output header fields
    fields = ['name', 'position', 'transcript_length', 'mean']
    fields.extend(map(str,window_sizes))
    print '\t'.join(fields)
    # process bed file
    for f in BEDFeature.parse(open(bed_file)):
        # retrieve conservation data
        bigwig_file = chrom_bigwig_dict[f.chrom]
        arr = extract_bigwig_data(f, bigwig_file)
        # measure conservation at various sliding windows
        window_scores = []
        for window_size in window_sizes:
            window_scores.append(best_sliding_window(arr, window_size, np.mean))
        # calc mean conservation
        finitearr = arr[np.isfinite(arr)]
        if len(finitearr) == 0:
            mean_cons = np.nan
        else:
            mean_cons = np.mean(finitearr)
        fields = [f.name, '%s:%d-%d[%s]' % (f.chrom, f.tx_start, f.tx_end, f.strand),
                  str(len(arr)), str(mean_cons)]
        fields.extend(map(str,window_scores))
        print '\t'.join(fields)