예제 #1
0
 def _consumer(input_queue, output_queue):
     while True:
         line = input_queue.get()
         if line is None:
             break
         f = BEDFeature.from_string(line)
         # retrieve conservation data
         bigwig_file = chrom_bigwig_dict[f.chrom]
         arr = extract_bigwig_data(f, bigwig_file)
         # measure conservation at various sliding windows
         window_scores = []
         for window_size in window_sizes:
             window_scores.append(best_sliding_window(arr, window_size, np.mean))
         # measure average conservation
         finitearr = arr[np.isfinite(arr)]
         if len(finitearr) == 0:
             mean_cons = np.nan
         else:
             mean_cons = np.mean(finitearr)
         fields = [f.name, '%s:%d-%d[%s]' % (f.chrom, f.tx_start, f.tx_end, f.strand),
                   str(len(arr)), str(mean_cons)]
         fields.extend(map(str,window_scores))
         result = '\t'.join(fields)
         output_queue.put(result)
     output_queue.put(None)
def bed_feature_conservation(f, chrom_bigwig_dict, hists):
    # retrieve conservation data
    bigwig_file = chrom_bigwig_dict[f.chrom]
    arr = extract_bigwig_data(f, bigwig_file)
    # ignore missing values
    finitearr = arr[np.isfinite(arr)]
    if len(finitearr) == 0:
        cons_str = 'NA'
    else:
        hists[f.name] += np.histogram(np.clip(finitearr, BIN_MIN, BIN_MAX), BINS)[0]
        cons_str = str(np.sum(finitearr))
    fields = f.name.split('|')
    fields.extend([f.chrom, str(f.tx_start), str(f.tx_end), f.strand, str(len(finitearr)), cons_str])
    return fields
예제 #3
0
def bed_feature_conservation(f, chrom_bigwig_dict, hists):
    # retrieve conservation data
    bigwig_file = chrom_bigwig_dict[f.chrom]
    arr = extract_bigwig_data(f, bigwig_file)
    # ignore missing values
    finitearr = arr[np.isfinite(arr)]
    if len(finitearr) == 0:
        cons_str = 'NA'
    else:
        hists[f.name] += np.histogram(np.clip(finitearr, BIN_MIN, BIN_MAX),
                                      BINS)[0]
        cons_str = str(np.sum(finitearr))
    fields = f.name.split('|')
    fields.extend([
        f.chrom,
        str(f.tx_start),
        str(f.tx_end), f.strand,
        str(len(finitearr)), cons_str
    ])
    return fields
예제 #4
0
def conservation_serial(bed_file, window_sizes, chrom_bigwig_dict):
    # output header fields
    fields = ['name', 'position', 'transcript_length', 'mean']
    fields.extend(map(str,window_sizes))
    print '\t'.join(fields)
    # process bed file
    for f in BEDFeature.parse(open(bed_file)):
        # retrieve conservation data
        bigwig_file = chrom_bigwig_dict[f.chrom]
        arr = extract_bigwig_data(f, bigwig_file)
        # measure conservation at various sliding windows
        window_scores = []
        for window_size in window_sizes:
            window_scores.append(best_sliding_window(arr, window_size, np.mean))
        # calc mean conservation
        finitearr = arr[np.isfinite(arr)]
        if len(finitearr) == 0:
            mean_cons = np.nan
        else:
            mean_cons = np.mean(finitearr)
        fields = [f.name, '%s:%d-%d[%s]' % (f.chrom, f.tx_start, f.tx_end, f.strand),
                  str(len(arr)), str(mean_cons)]
        fields.extend(map(str,window_scores))
        print '\t'.join(fields)