Python BedTool.columns Examples

Programming Language: Python

Namespace/Package Name: pybedtools

Class/Type: BedTool

Method/Function: columns

Examples at hotexamples.com: 2

Python BedTool.columns - 2 examples found. These are the top rated real world Python examples of pybedtools.BedTool.columns extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BedTool(30)

intersect(30)

from_dataframe(30)

filter(23)

cat(17)

closest(15)

count(14)

coverage(13)

field_count(12)

each(10)

flank(8)

_tmp(8)

map(7)

bam_to_bed(7)

genome_coverage(5)

jaccard(5)

groupby(4)

bed6(3)

head(3)

bam_to_fastq(2)

columns(2)

all_hits(2)

iterrows(1)

items(1)

introns(1)

__iter__(1)

index(1)

getfasta(1)

_tabixed(1)

__new__(1)

__str__(1)

chrom(1)

features(1)

append(1)

drop(1)

delete_temporary_history(1)

__getitem__(1)

at(1)

cluster(1)

cut(1)

Example #1

Show file

    def from_bed(
            cls, bed, location, chrom_size_path, region_dim="region", sort_bed=True
    ):
        """
        Create empty RegionDS from a bed file.

        Parameters
        ----------
        bed
        location
        region_dim
        chrom_size_path
        sort_bed

        Returns
        -------

        """

        # sort bed based on chrom_size_path
        if isinstance(bed, (str, pathlib.PosixPath)):
            if sort_bed:
                bed = BedTool(bed).sort(g=chrom_size_path).to_dataframe()
            else:
                bed = BedTool(bed)
        else:
            bed = bed

        n_cols = bed.shape[1]
        if n_cols == 3:
            bed.index = bed.index.map(lambda i: f"{region_dim}_{i}")
        elif n_cols == 4:
            bed.set_index(bed.columns[3], inplace=True)
        else:
            raise ValueError(
                "bed file need to be either 3 columns (chrom, start, end) "
                "or 4 columns (chrom, start, end, name)"
            )
        bed.index.name = region_dim
        bed.columns = ["chrom", "start", "end"]

        ds = xr.Dataset({})
        region_dim = bed.index.name
        for k, v in bed.items():
            key = f"{region_dim}_{k}"
            ds.coords[key] = v
            if ds.coords[key].dtype == "object":
                ds.coords[key] = ds.coords[key].astype(str)

        location = pathlib.Path(location).absolute()
        location.mkdir(exist_ok=True, parents=True)
        region_ds = cls(
            ds,
            region_dim=region_dim,
            location=location,
            chrom_size_path=chrom_size_path,
        )
        region_ds.save()
        return region_ds

Example #2

Show file

File: rawdata.py Project: xiaoyiou/eotools

    def preprocess(self,
                   chrlenPath,
                   genomePath,
                   w=100,
                   upStream=1000,
                   downStream=1000,
                   overlap=0.5,
                   method='mean',
                   col=4,
                   type='bed',
                   n_workers=4):

        assert upStream % w == 0 and downStream % w == 0
        window = BedTool().window_maker(g=chrlenPath, w=w)
        genes = BedTool(genomePath).to_dataframe()
        genes = genes[genes['feature'] == 'gene'][[
            'seqname', 'start', 'strand', 'attributes'
        ]]
        genes['attributes'] = genes['attributes'].apply(
            lambda x: x[x.find('=') + 1:x.find(';')])
        genes['start'] = genes['start'].apply(lambda x: x - upStream)
        genes['end'] = genes['start'] + upStream + downStream
        genes = genes[['seqname', 'start', 'end', 'attributes', 'strand']]
        genes.columns = ['chrom', 'start', 'end', 'ID', 'strand']
        genes = genes[genes.start >= 0]
        genes.chrom = genes.chrom.apply(lambda x: x[0].lower() + x[1:])
        atlas = BedTool.from_dataframe(genes[['chrom', 'start', 'end',
                                              'ID']]).sort()
        genes = genes.set_index(['ID'])

        def worker(atlas, window, path, genes, col, method, overlap, type):
            p = BedTool(path).sort()
            a = None
            if type == 'bed':
                a = window.map(p, c=1, o='count', F=overlap)
            elif type == 'sigbed':
                a = window.map(p, o=method, c=col, F=overlap)
            tmp = atlas.intersect(a, loj=True, wa=True, wb=True).to_dataframe()
            grps = tmp.groupby(['name'])
            data = []
            for ind in genes.index:
                row = grps.get_group(ind)['thickEnd'].tolist()
                data.append(row if genes.ix[ind].strand == '+' else row[::-1])
            return pd.DataFrame(data, index=genes.index.tolist())

        self.raw = {}
        with futures.ThreadPoolExecutor(max_workers=n_workers) as executor:
            jobs = {}
            for i, path in enumerate(self.paths):
                job = executor.submit(worker, atlas, window, path, genes, col,
                                      method, overlap, type)
                jobs[job] = self.names[i]

            for job in futures.as_completed(jobs):
                self.raw[jobs[job]] = job.result().dropna()
                if self.genes == None:
                    self.genes = self.raw[jobs[job]].index.tolist()