Beispiel #1
0
def compare(fileUrl, chrom, start, end):
    bbiStartTime = time.time()
    with bbi.open(fileUrl) as file:
        file.fetch(chrom, start, end)
    bbiEndTime = time.time()
    osStartTime = time.time()
    subprocess.run([
        'bin/bigWigToBedGraph', fileUrl, 'out/out.bedGraph',
        '-chrom=%s' % chrom,
        '-start=%s' % start,
        '-end=%s' % end
    ],
                   stdout=subprocess.PIPE).stdout.decode('utf-8')
    osEndTime = time.time()
    return pd.Series({
        'chrom': chrom,
        'start': start,
        'end': end,
        'length': end - start,
        'bbiStartTime': bbiStartTime,
        'bbiEndTime': bbiEndTime,
        'bbiTimeDiff': bbiEndTime - bbiStartTime,
        'osStartTime': osStartTime,
        'osEndTime': osEndTime,
        'osTimeDiff': osEndTime - osStartTime
    })
Beispiel #2
0
def read_bigbed(path, chrom, start=None, end=None, engine="auto"):
    """
    Read intervals from a bigBed file.

    Parameters
    ----------
    path : str
        Path or URL to a bigBed file
    chrom : str
    start, end : int, optional
        Start and end coordinates. Defaults to 0 and chromosome length.
    engine : {"auto", "pybbi", "pybigwig"}
        Library to use for querying the bigBed file.

    Returns
    -------
    DataFrame

    """
    engine = engine.lower()

    if engine == "auto":
        if bbi is None and pyBigWig is None:
            raise ImportError(
                "read_bigbed requires either the pybbi or pyBigWig package"
            )
        elif bbi is not None:
            engine = "pybbi"
        else:
            engine = "pybigwig"

    if engine in ("pybbi", "bbi"):
        if start is None:
            start = 0
        if end is None:
            end = -1
        with bbi.open(path) as f:
            df = f.fetch_intervals(chrom, start=start, end=end)

    elif engine == "pybigwig":
        f = pyBigWig.open(path)
        if start is None:
            start = 0
        if end is None:
            end = f.chroms()[chrom]
        ivals = f.entries(chrom, start, end)
        df = pd.DataFrame(ivals, columns=["start", "end", "rest"])
        df.insert(0, "chrom", chrom)

    else:
        raise ValueError(
            "engine must be 'auto', 'pybbi' or 'pybigwig'; got {}".format(engine)
        )

    return df
Beispiel #3
0
def getCoverageFile(task, dataPath):
    problem = task['problem']

    coveragePath = os.path.join(dataPath, 'coverage.bedGraph')

    coverageUrl = task['trackUrl']

    with bbi.open(coverageUrl) as coverage:
        try:
            coverageInterval = coverage.fetch_intervals(problem['chrom'],
                                                        problem['chromStart'],
                                                        problem['chromEnd'],
                                                        iterator=True)
            return fixAndSaveCoverage(coverageInterval, coveragePath, problem)
        except KeyError:
            return
Beispiel #4
0
def runTest(fileUrl, chrom, start, end, bins):
    diff = end - start
    if diff <= bins:
        raise Exception
    bbiStartTime = time.time()
    with bbi.open(fileUrl) as file:
        bbiOut = file.fetch(chrom, start, end, bins)
    bbiEndTime = time.time()
    bbiLen = len(bbiOut)

    subprocessStartTime = time.time()
    subprocessOut = subprocess.run(
        ['bin/bigWigSummary', fileUrl, chrom,
         str(start),
         str(end),
         str(bins)],
        stdout=subprocess.PIPE).stdout.decode('utf-8')
    data = subprocessOut.split()
    floatData = []
    for i in data:
        try:
            floatData.append(float(i))
        except ValueError:
            continue
    data = np.array(floatData).astype(np.double)
    subprocessEndTime = time.time()
    subprocessLen = len(data)
    return {
        'chrom': chrom,
        'start': start,
        'end': end,
        'diff': diff,
        'bins': bins,
        'bbiStartTime': bbiStartTime,
        'bbiEndTime': bbiEndTime,
        'bbiTimeDiff': bbiEndTime - bbiStartTime,
        'bbiLen': bbiLen,
        'subprocessStartTime': subprocessStartTime,
        'subprocessEndTime': subprocessEndTime,
        'subprocessTimeDiff': subprocessEndTime - subprocessStartTime,
        'subprocessLen': subprocessLen
    }
Beispiel #5
0
def runTest(fileUrl, chrom, start, end, bins):
    diff = end - start
    if diff <= bins:
        raise Exception
    bbiStartTime = time.time()
    bbiLen = 0
    with bbi.open(fileUrl) as file:
        bbiOut = file.fetch(chrom, start, end, bins)
    bbiEndTime = time.time()
    bbiLen = len(bbiOut)

    osCommand = './bigWigSummary %s %s %s %s %s' % (fileUrl, chrom, start, end,
                                                    bins)
    osStartTime = time.time()

    osOut = subprocess.run(
        ['./bigWigSummary', fileUrl, chrom,
         str(start),
         str(end),
         str(bins)],
        stdout=subprocess.PIPE).stdout.decode('utf-8')
    osEndTime = time.time()
    osLen = len(osOut.split())

    return {
        'chrom': chrom,
        'start': start,
        'end': end,
        'diff': diff,
        'bins': bins,
        'bbiStartTime': bbiStartTime,
        'bbiEndTime': bbiEndTime,
        'bbiTimeDiff': bbiEndTime - bbiStartTime,
        'bbiLen': bbiLen,
        'osStartTime': osStartTime,
        'osEndTime': osEndTime,
        'osTimeDiff': osEndTime - osStartTime,
        'osLen': osLen
    }
Beispiel #6
0
 def __init__(self, file, **kwargs):
     properties = BigWig.DEFAULT_PROPERTIES.copy()
     properties.update({'file': file, **kwargs})
     super().__init__(**properties)
     import bbi
     self.bw = bbi.open(self.properties['file'])