Ejemplo n.º 1
0
def main():

    args = parse_args()

    # Read input files
    _logger.info('Reading input files')
    peaks = pd.read_csv(args.input, sep='\t', header=None,
                        usecols=[0, 1, 2], skiprows=args.skip)
    sizes = pd.read_csv(args.sizes, sep='\t', header=None)

    # Subset to peaks in sizes file
    # TODO: Move this test into the df_to_bedGraph function
    peaks_filtered = peaks[peaks[0].isin(sizes[0])].copy()
    _logger.info('Retaining ' + str(len(peaks_filtered)) + ' of ' +
                 str(len(peaks)) + ' peaks in given chromosomes.')

    # Add score of 1 for all peaks
    _logger.info('Adding score')
    peaks_filtered[3] = 1

    # Set prefix for output files
    if args.prefix is None:
        # Output file gets name from input
        prefix = args.input
    else:
        prefix = args.prefix

    # Write bedGraph
    _logger.info('Writing peaks to bedGraph file')
    df_to_bedGraph(peaks_filtered, prefix + '.bedGraph')

    # Write bigWig and delete bedGraph
    _logger.info('Writing peaks to bigWig file {}'.format(prefix + '.bw'))
    bedgraph_to_bigwig(prefix + '.bedGraph', args.sizes, deletebg=True)
    _logger.info('Done!')
Ejemplo n.º 2
0
def main():
    """Convert peak files to bigwig."""
    args = parse_args()

    # Set name for output file
    if args.prefix is None:
        # Output file gets name from input
        prefix = os.path.basename(args.input)
    else:
        prefix = args.prefix
    out_bg_name = os.path.join(args.out_dir, prefix + '.bedGraph')

    # Read input files
    _logger.info('Reading input file')
    peaks = read_intervals(args.input, skip=args.skip)
    _logger.info('Read ' + str(len(peaks)) + ' peaks.')
    sizes = read_sizes(args.sizes)

    # Add score of 1 for all peaks
    _logger.info('Adding score')
    peaks['score'] = 1

    # Write bedGraph
    _logger.info('Writing peaks to bedGraph file')

    # Note: peaks will be subset to chromosomes in sizes file.
    df_to_bedGraph(peaks, out_bg_name, sizes)

    # Write bigWig and delete bedGraph
    _logger.info('Writing peaks to bigWig file {}'.format(prefix + '.bw'))
    bedgraph_to_bigwig(out_bg_name, args.sizes,
                       deletebg=True, sort=True)

    _logger.info('Done!')
Ejemplo n.º 3
0
def save_to_bedgraph(batch_range,
                     item,
                     channel,
                     intervals,
                     outfile,
                     rounding=None,
                     threshold=None):
    """Write out the tracks and peaks to bedGraphs.

    Args:
        batch_range : List containing start and end position of batch to write.
        item : Output from the queue.
        channel : Channel to be written out.
        intervals : pandas object containing inference intervals.
        outfile : The output file to write the output to.
        rounding : If not None, round the scores to given value.
        threshold : if not None, threhsold the scores to given value.

    """
    keys, batch = item
    start = batch_range[0]
    end = batch_range[1]
    scores = batch[start:end, :, channel]
    # Round scores - for regression output
    if rounding is not None:
        scores = scores.astype('float64')
        # Sometimes np.around doesn't work with float32. To investigate.
        scores = np.around(scores, decimals=rounding)

    # Apply thresholding only to peaks
    if threshold is not None and channel == 1:
        scores = (scores > threshold).astype(int)
    # if the batch contains values > 0, write them
    if (scores > 0).any():
        # Select intervals corresponding to batch
        batch_intervals = intervals.iloc[keys.numpy()[start:end], :].copy()
        # Add scores to each interval
        batch_intervals['scores'] = np.split(scores, scores.shape[0])
        batch_intervals['scores'] = [x[0] for x in batch_intervals['scores']]
        # Select intervals with scores>0
        batch_intervals = batch_intervals.loc[scores.sum(axis=1) > 0, :]

        # Expand each interval, combine with scores, and contract to smaller
        # intervals
        batch_bg = intervals_to_bg(batch_intervals)
        df_to_bedGraph(batch_bg, outfile)