Esempi in Python per BedTool.all_hits

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: pybedtools

Classe/tipologia: BedTool

Metodo/funzione: all_hits

Esempi su hotexamples.com: 3

BedTool.all_hits in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per pybedtools.BedTool.all_hits, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

BedTool(30)

intersect(30)

from_dataframe(30)

filter(23)

cat(17)

closest(15)

count(14)

coverage(13)

field_count(12)

each(10)

flank(8)

_tmp(8)

map(7)

bam_to_bed(7)

genome_coverage(5)

jaccard(5)

groupby(4)

bed6(3)

head(3)

bam_to_fastq(2)

columns(2)

all_hits(2)

iterrows(1)

items(1)

introns(1)

__iter__(1)

index(1)

getfasta(1)

_tabixed(1)

__new__(1)

__str__(1)

chrom(1)

features(1)

append(1)

drop(1)

delete_temporary_history(1)

__getitem__(1)

at(1)

cluster(1)

cut(1)

Esempio n. 1

Mostra file

File: permethbed.py Progetto: luizirber/WGBS_Tools

def roi_meth(in_bed_prefixes, in_sample_list, out_table, mask_file, roi_file,
             min_read_count, min_cpg_count, min_file_count, raw_data_name,
             thread_count):
    """
    Creates a table with the methylation across desired Regions of
    Interest (ROI).

    :param in_bed_prefixes: list of bed file prefixes.
    :param in_sample_list: list of sample names. Order corresponds with
                           in_bed_prefixes.
    :param out_table: name of output table file.
    :param mask_file: bed or gtf file that will contain areas masked from
                      analysis (ie: any areas in this file will be ignored).
    :param roi_file: bed or gtf file containing the areas of the genome you
                     want analyzed.
    :param min_read_count: minimum read count necessary for a region of
                           interest. If a sample has less than this read count,
                           NA will be input instead of the average methylation
                           over the ROI.
    :param min_cpg_count: minimum CpG count necessary for a region of
                          interest. If a sample has less than this read count,
                          NA will be input instead of the average methylation
                          over the ROI.
    :param min_file_count: minimum file count to keep a region of interest. If
                           less than this many files/samples meet the
                           previous minimum requirements, that roi will not
                           have output in your out_table file.
    :param raw_data_name: optional file that (if populated) will be the output
                          of methylated and total read counts for each sample.
                          The minimums still apply and will work the same as
                          the main file.
    :param thread_count: int designating threads to allocate for multithreading.
    :return: Nothing
    """
    # Reduces thread count if there aren't enough tasks to fill all threads
    if len(in_bed_prefixes) < thread_count:
        thread_count = len(in_bed_prefixes)
    outfile = open(out_table, 'wb')
    header_line = 'chrom\tstart\tend\tname'
    for samp in in_sample_list:
        header_line = '{}\t{}'.format(header_line, samp)
    header_line = '{}\n'.format(header_line)
    outfile.write(header_line)
    if raw_data_name != "":
        raw_data = open(raw_data_name, 'wb')
        header_line = 'chrom\tstart\tend\tname'
        for samp in in_sample_list:
            header_line = '{0}\t{1}_methylated\t{1}_total\t{1}_cpgs'\
                .format(header_line, samp)
        header_line = '{}\n'.format(header_line)
        raw_data.write(header_line)

    roi = BedTool(roi_file)
    if mask_file != "":
        mask = BedTool(mask_file)
    else:
        mask = BedTool([('chrNONE', 0, 0)])

    # Get chromosome names in ROI file
    logging.info('Loading chromosomes:')
    chrom_names_tmp = []
    for line in roi:
        chrom = utilities.show_value(line.chrom)
        if chrom not in chrom_names_tmp:
            chrom_names_tmp.append(chrom)
    # Remove chromosome names without accompanying PerMeth file
    chrom_names = []
    for chrom in chrom_names_tmp:
        keepchrom = True
        for pm_sample in in_bed_prefixes:
            permeth_name = '{}{}.bed'.format(pm_sample, chrom)
            if not os.path.exists(permeth_name):
                permeth_name = '{}{}.bed.gz'.format(pm_sample, chrom)
                if not os.path.exists(permeth_name):
                    # logging.warning('Cannot access a file for {}, skipping!',
                    #                 extra=chrom)
                    print 'Cannot access a file {} for {}, skipping!'\
                        .format(permeth_name, chrom)
                    keepchrom = False
        if keepchrom:
            chrom_names.append(chrom)

    # Loop through, gather information, and print each chrom info
    for chrom in chrom_names:
        # Create methylation dictionary for chromosomal ROI
        roi_chrom = roi.all_hits(BedTool([(chrom, 0, 999999999)])[0])
        meth_dict = utilities.nested_dict(4, str)
        for feature in roi_chrom:
            meth_dict[feature.start][feature.end]['name'] = feature.name
        proc_list = list(in_bed_prefixes)

        def worker():
            """Worker for multithreading that analyzes a chromosome."""
            while proc_list:
                pm_prefix = proc_list.pop()
                chrom_meth(pm_prefix, chrom, roi_chrom, mask, meth_dict)

        threads = [Thread(target=worker) for i in range(thread_count)]
        [t.start() for t in threads]
        [t.join() for t in threads]

        # Print information into table
        for start in sorted(meth_dict):
            for end in sorted(meth_dict[start]):
                name = meth_dict[start][end]['name']
                print_line = '{}\t{}\t{}\t{}'.format(chrom, start, end, name)
                raw_col_line = print_line
                file_print_count = 0
                for pm_sample in in_bed_prefixes:
                    meth = meth_dict[start][end][pm_sample]['meth']
                    total = meth_dict[start][end][pm_sample]['total']
                    cpg = meth_dict[start][end][pm_sample]['cpg']
                    if total >= min_read_count and cpg >= min_cpg_count:
                        try:
                            float(meth)
                        except ValueError:
                            print "Not a float: {}".format(meth)
                        try:
                            float(total)
                        except ValueError:
                            print "Not a float: {}".format(total)
                        meth_perc = float(meth) / float(total)
                        print_line = '{0}\t{1:.3f}'.format(
                            print_line, meth_perc)
                        file_print_count += 1
                    else:
                        print_line = '{0}\tNA'.format(print_line)
                    raw_col_line = '{}\t{}\t{}\t{}'\
                        .format(raw_col_line, meth, total, cpg)
                print_line = '{}\n'.format(print_line)
                raw_col_line = '{}\n'.format(raw_col_line)
                if file_print_count >= min_file_count:
                    outfile.write(print_line)
                    if raw_data_name != "":
                        raw_data.write(raw_col_line)

Esempio n. 2

Mostra file

File: bed_split.py Progetto: jjmini/dmcade

            pass

    def print_split_sort_bed(self):
        pass


split_num = 100

split_region_list = [[] * 5]
print split_region_list
bed = BedTool(
    '/Users/huangzhibo/workitems/10.testData/testPlatformTJ/bed/test.bed')

bed = BedTool(bed.sort().merge().window_maker(b=bed.fn, w=100))

bed.all_hits()

# x = BedTool().window_maker(genome='hg38', w=1000000)
bed.saveas(
    '/Users/huangzhibo/workitems/10.testData/testPlatformTJ/bed/test_w100.bed')

split_num = bed.count() if bed.count() < split_num else split_num

print bed.count() / split_num

# print bed.split(10, 'out')

# print x

n = 0
for region in bed:

Esempio n. 3

Mostra file

File: bed_split.py Progetto: huangzhibo/HelloWorld

            pass


    def print_split_sort_bed(self):
        pass

split_num = 100

split_region_list = [[]*5]
print split_region_list
bed = BedTool('/Users/huangzhibo/workitems/10.testData/testPlatformTJ/bed/test.bed')


bed = BedTool(bed.sort().merge().window_maker(b=bed.fn, w=100))

bed.all_hits()

# x = BedTool().window_maker(genome='hg38', w=1000000)
bed.saveas('/Users/huangzhibo/workitems/10.testData/testPlatformTJ/bed/test_w100.bed')

split_num = bed.count() if bed.count() < split_num else split_num

print bed.count()/split_num

# print bed.split(10, 'out')

# print x

n = 0
for region in bed:
    # print region.length