def test_RNA_position_placement_split(self):    
     
     """
     
     Makes sure that lists of regions works for both positive and negative strands
     
     """
     
     tool = pybedtools.create_interval_from_list("chr1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    +    125    125".split())
     location_dict = {"ENSMUSG1" : {"strand" : "+", "regions" : [(0, 50),
                                                                 (100, 150),
                                                                 ] 
                                    }
                      }
     
     self.assertEqual(RNA_position(tool, location_dict), (.50, .75) )
     
     tool = pybedtools.create_interval_from_list("chr1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -    25    25".split())
     location_dict = {"ENSMUSG1" : {"strand" : "-", "regions" : [(100, 150),
                                                                 (0, 50),
                                                                 ] 
                                    }
                      }
     
     self.assertEqual(RNA_position(tool, location_dict), (.50, .75))
 def test_RNA_position_placement(self):
     
     """
     
     Makes sure that the placement within a region or list of regions is correct
     
     """
     
 
     tool = pybedtools.create_interval_from_list("chr1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    +    60    60".split())
     location_dict = {"ENSMUSG1" : {"strand" : "+", "regions" : [(0,100),
                                                                 ] 
                                    }
                      }
     
     self.assertEqual(RNA_position(tool, location_dict), (.60, .60))
     
     tool = pybedtools.create_interval_from_list("chr1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -    60    60".split())
     location_dict = {"ENSMUSG1" : {"strand" : "-", "regions" : [(0,100),
                                                                 ] 
                                    }
                      }
     
     #individual_fraction, total_fraction
     self.assertEqual(RNA_position(tool, location_dict), (.4, .4))
Example #3
0
def create_bed_tool_from_miso_a5ss(miso_annotation, is_alt=True):
    """
    Deprecated function
    """
    if is_alt == True:
        # format is: chr2:183800103:183799993|183800021:-@chr2:183799480:183799560:-
        chrom, start, end, strand = miso_annotation.split(':')
        end1, end2 = end.split('|')
        if (strand == '+'):
            splice1 = bt.create_interval_from_list(
                [chrom, int(start) - 1, end1, '0', '0', strand])
            splice2 = bt.create_interval_from_list(
                [chrom, int(end1) - 1, end2, '0', '0', strand])  # middle
        else:
            splice1 = bt.create_interval_from_list(
                [chrom, int(end2) - 1, start, '0', '0', strand])
            splice2 = bt.create_interval_from_list(
                [chrom, int(end1) - 1, end2, '0', '0', strand])  # middle

        return splice1, splice2
    else:

        # format is: chr17:80008538:80008640:-
        chrom, start, end, strand = miso_annotation.split(':')
        some_bedtool = bt.create_interval_from_list(
            [chrom, int(start) - 1, end, '0', '0', strand])
        return some_bedtool
Example #4
0
def test_pickleable():
    interval = pybedtools.create_interval_from_list(
        ['chr1', '1', '100', 'asdf'])
    fn = pybedtools.BedTool._tmp()
    import pickle
    out = open(fn, 'w')
    pickle.dump(interval, out)
    out.close()
    new_interval = pickle.load(open(fn))
    assert str(interval) == str(new_interval)

    interval = pybedtools.create_interval_from_list(
        ['chr1', '1', '100'])
    fn = pybedtools.BedTool._tmp()
    import pickle
    out = open(fn, 'w')
    pickle.dump(interval, out)
    out.close()
    new_interval = pickle.load(open(fn))
    assert str(interval) == str(new_interval)

    interval = pybedtools.create_interval_from_list(
        "chr2L	.	UTR	41	70	0	+	.	ID=mRNA:xs2:UTR:41-70;Parent=mRNA:xs2;".split('\t'))
    fn = pybedtools.BedTool._tmp()
    import pickle
    out = open(fn, 'w')
    pickle.dump(interval, out)
    out.close()
    new_interval = pickle.load(open(fn))
    assert str(interval) == str(new_interval)
Example #5
0
    def test_second_start_segmentation(self):
        segmentation = {
            'G001': {
                'gene_segment': [],
                'T0001': [
                    pybedtools.create_interval_from_list(
                        ['1', '.', 'exon', '100', '200', '.', '+', '.',
                         'gene_id: "G001"', 'transcript_id: "T0001"']),
                ],
            },
            'G002': {
                'gene_segment': [],
                'T0002': [
                    pybedtools.create_interval_from_list(
                        ['1', '.', 'exon', '50', '100', '.', '-', '.',
                         'gene_id: "G001"', 'transcript_id: "T0001"']),
                ],
            },
        }

        second_start, _ = xlsites._second_start(
            read=0, poss=(1, 2, 99, 100), strand='+', chrom=1,
            segmentation=segmentation, holesize_th=4)
        self.assertEqual(second_start, 99)

        second_start, _ = xlsites._second_start(
            read=0, poss=(99, 100, 199, 200), strand='-', chrom=1,
            segmentation=segmentation, holesize_th=4)
        self.assertEqual(second_start, 100)

        second_start, _ = xlsites._second_start(
            read=0, poss=(1, 2, 4, 5), strand='-', chrom=1,
            segmentation=segmentation, holesize_th=4)
        self.assertEqual(second_start, 2)
Example #6
0
def test_pickleable():
    interval = pybedtools.create_interval_from_list(
        ['chr1', '1', '100', 'asdf'])
    fn = pybedtools.BedTool._tmp()
    import pickle
    out = open(fn, 'w')
    pickle.dump(interval, out)
    out.close()
    new_interval = pickle.load(open(fn))
    assert str(interval) == str(new_interval)

    interval = pybedtools.create_interval_from_list(['chr1', '1', '100'])
    fn = pybedtools.BedTool._tmp()
    import pickle
    out = open(fn, 'w')
    pickle.dump(interval, out)
    out.close()
    new_interval = pickle.load(open(fn))
    assert str(interval) == str(new_interval)

    interval = pybedtools.create_interval_from_list(
        "chr2L	.	UTR	41	70	0	+	.	ID=mRNA:xs2:UTR:41-70;Parent=mRNA:xs2;".split(
            '\t'))
    fn = pybedtools.BedTool._tmp()
    import pickle
    out = open(fn, 'w')
    pickle.dump(interval, out)
    out.close()
    new_interval = pickle.load(open(fn))
    assert str(interval) == str(new_interval)
Example #7
0
 def finder(self, region):
     s = ['.' for i in self.current_seq]
     for hit in self.regex_plus.finditer(self.current_seq):
         start, stop = hit.span()
         s[start:stop] = hit.group()
         strand = ' (+)'
         s[stop:len(strand)] = strand
         self.intervals.append(pybedtools.create_interval_from_list([
             region.chrom,
             str(region.start + start),
             str(region.start + stop),
             hit.group(),
             '0',
             '+']))
     for hit in self.regex_minus.finditer(self.current_seq):
         start, stop = hit.span()
         s[start:stop] = Seq(hit.group()).reverse_complement()
         strand = ' (-)'
         s[stop:len(strand)] = strand
         self.intervals.append(pybedtools.create_interval_from_list([
             region.chrom,
             str(region.start + start),
             str(region.start + stop),
             hit.group(),
             '0',
             '-']))
     yield ''.join(s)
 def test_convert_to_mRNA_position_placement(self):
     
     """
     
     Makes sure that the placement within a region or list of regions is correct
     
     """
     
     return
     interval = pybedtools.create_interval_from_list("ENSMUSG1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    +    60    60".split())
     location_dict = {"ENSMUSG1" : {"strand" : "+", "regions" : [(0,100),
                                                                 ] 
                                    }
                      }
     
     correct_tool = pybedtools.create_interval_from_list("ENSMUSG1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    +    60    60".split())
     self.assertEqual(convert_to_mRNA_position(interval, location_dict), correct_tool)
     
     interval = pybedtools.create_interval_from_list("ENSMUSG1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -".split())
     location_dict = {"ENSMUSG1" : {"strand" : "-", "regions" : [(0,100),
                                                                 ] 
                                    }
                      }
     
     #individual_fraction, total_fraction
     correct_tool = pybedtools.create_interval_from_list("ENSMUSG1    40    50    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -".split())
     x =  convert_to_mRNA_position(interval, location_dict)
     print x
     self.assertEqual(x, correct_tool)
Example #9
0
    def test_RNA_position_placement(self):
        """
        
        Makes sure that the placement within a region or list of regions is correct
        
        """

        tool = pybedtools.create_interval_from_list(
            "chr1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    +    60    60"
            .split())
        location_dict = {
            "ENSMUSG1": {
                "strand": "+",
                "regions": [
                    (0, 100),
                ]
            }
        }

        self.assertEqual(RNA_position(tool, location_dict), (.60, .60))

        tool = pybedtools.create_interval_from_list(
            "chr1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -    60    60"
            .split())
        location_dict = {
            "ENSMUSG1": {
                "strand": "-",
                "regions": [
                    (0, 100),
                ]
            }
        }

        #individual_fraction, total_fraction
        self.assertEqual(RNA_position(tool, location_dict), (.4, .4))
Example #10
0
    def test_fix_bed6_empytname3(self):
        feature = create_interval_from_list(
            ['1', '1', '10', 'b,.,a', '5', '+'])

        converted = clusters._fix_bed6_emptyname(feature)
        expected = create_interval_from_list(['1', '1', '10', 'b,a', '5', '+'])
        self.assertEqual(expected, converted)
Example #11
0
def bedtool_from_renamed_twobed_index2(name, stream):
    """
    WARNING THIS IS ONLY GOOD FOR PHASTCON MASK FUNCTION

    Parameters
    ----------
    name
    stream

    Returns
    -------

    """
    low_chrom, low_start, low_end, low_name, low_score, low_strand, \
    hi_chrom, hi_start, hi_end, hi_name, hi_score, hi_strand = name.split('\t')

    if stream == 'upstream':
        if low_strand == '+' and hi_strand == '+':
            region = pybedtools.create_interval_from_list([
                low_chrom, low_start, low_end, low_name, low_score, low_strand
            ])
        else:
            region = pybedtools.create_interval_from_list(
                [hi_chrom, hi_start, hi_end, hi_name, hi_score, hi_strand])
    elif stream == 'downstream':
        if low_strand == '-' and hi_strand == '-':
            region = pybedtools.create_interval_from_list([
                low_chrom, low_start, low_end, low_name, low_score, low_strand
            ])
        else:
            region = pybedtools.create_interval_from_list(
                [hi_chrom, hi_start, hi_end, hi_name, hi_score, hi_strand])
    return region
Example #12
0
def create_bed_tool_from_miso_a3ss(miso_annotation, is_alt=True):
    """
    Deprecated function
    """
    if is_alt == True:
        # format is:
        # chr2:55764619:55764721:+@chr2:55771074|55771161:55771210:+      ENSG00000163001
        # chr17:62502194:62502407:-@chr17:62500960|62500998:62500795:-    ENSG00000108654
        # chr2:55771074|55771161:55771210:+
        # chr1:43830128|43830131:43829995:-
        chrom, start, end, strand = miso_annotation.split(':')
        start1, start2 = start.split('|')

        if (strand == '+'):
            splice1 = bt.create_interval_from_list(
                [chrom, int(start1) - 1, start2, '0', '0',
                 strand])  # the middle one
            splice2 = bt.create_interval_from_list(
                [chrom, int(start2) - 1, end, '0', '0',
                 strand])  # the downstream one
        elif (strand == '-'):
            splice1 = bt.create_interval_from_list(
                [chrom, int(start1) - 1, start2, '0', '0', strand])
            splice2 = bt.create_interval_from_list(
                [chrom, int(end) - 1, start1, '0', '0', strand])
        return splice1, splice2
    else:
        # format is: chr17:80008538:80008640:-
        chrom, start, end, strand = miso_annotation.split(':')
        some_bedtool = bt.create_interval_from_list(
            [chrom, int(start) - 1, end, '0', '0', strand])
        return some_bedtool
Example #13
0
def string_to_interval(s):
    """
    Convert string of the form "chrom:start-stop" or "chrom:start-stop[strand]"
    to an interval.

    Assumes zero-based coords.

    If it's already an interval, then return it as-is.
    """
    if isinstance(s, basestring):
        m = coord_re.search(s)
        if m.group('strand'):
            return pybedtools.create_interval_from_list([
                m.group('chrom'),
                m.group('start'),
                m.group('stop'),
                '.',
                '0',
                m.group('strand')])
        else:
            return pybedtools.create_interval_from_list([
                m.group('chrom'),
                m.group('start'),
                m.group('stop'),
            ])
    return s
 def test_convert_to_mRNA_position_placement_split(self):    
     
     """
     
     Makes sure that lists of regions works for both positive and negative strands
     
     """
     
     return
     tool = pybedtools.create_interval_from_list("ENSMUSG1    125    127    ENSMUSG1_1_83;ENSMUSG1_6_83    0    +    125    125".split())
     location_dict = {"ENSMUSG1" : {"strand" : "+", "regions" : [(0, 50),
                                                                 (100, 150),
                                                                 ] 
                                    }
                      }
     
     correct_tool = pybedtools.create_interval_from_list("ENSMUSG1    75    77    ENSMUSG1_1_83;ENSMUSG1_6_83    0    +    125    125".split())
     self.assertEqual(convert_to_mRNA_position(tool, location_dict), correct_tool )
     
     tool = pybedtools.create_interval_from_list("ENSMUSG1    25    27    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -    25    25".split())
     location_dict = {"ENSMUSG1" : {"strand" : "-", "regions" : [(100, 150),
                                                                 (0, 50),
                                                                 ] 
                                    }
                      }
     
     correct_tool = pybedtools.create_interval_from_list("ENSMUSG1    73    75    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -    25    25".split())
     self.assertEqual(convert_to_mRNA_position(tool, location_dict), correct_tool)
Example #15
0
def string_to_interval(s):
    """
    Convert string of the form "chrom:start-stop" or "chrom:start-stop[strand]" to an interval.

    Assumes zero-based coords.

    If it's already an interval, then return it as-is.
    """
    if isinstance(s, basestring):
        m = coord_re.search(s)
        if m.group('strand'):
            return pybedtools.create_interval_from_list([
                m.group('chrom'),
                m.group('start'),
                m.group('stop'),
                '.',
                '0',
                m.group('strand')])
        else:
            return pybedtools.create_interval_from_list([
                m.group('chrom'),
                m.group('start'),
                m.group('stop'),
            ])
    return s
Example #16
0
def intersection2gff(intersection):
    #offset = 9;
    a = str(intersection).strip().split("\t")
    if(a[9] == '.'):
        return create_interval_from_list(a[:9]), None;
    else:
        return create_interval_from_list(a[:9]), create_interval_from_list(a[9:]);
Example #17
0
    def get_bedtools(self):
        upstream = None
        downstream = None

        if self.source == 'twobed':
            lower_chrom, lower_start, lower_end, \
            lower_name, lower_score, lower_strand, \
            upper_chrom, upper_start, upper_end, \
            upper_name, upper_score, upper_strand = self.annotation.split('\t')

            if lower_strand == '+' and upper_strand == '+':
                upstream = bt.create_interval_from_list([
                    lower_chrom, lower_start, lower_end, lower_name,
                    lower_score, lower_strand
                ])
                downstream = bt.create_interval_from_list([
                    upper_chrom, upper_start, upper_end, upper_name,
                    upper_score, upper_strand
                ])
            elif lower_strand == '-' and upper_strand == '-':
                downstream = bt.create_interval_from_list([
                    lower_chrom, lower_start, lower_end, lower_name,
                    lower_score, lower_strand
                ])
                upstream = bt.create_interval_from_list([
                    upper_chrom, upper_start, upper_end, upper_name,
                    upper_score, upper_strand
                ])
            else:
                print("Warning, strand not correct!")
                return -1
            return upstream, downstream
    def test_convert_to_mRNA_position_fail(self):
        """ Various attempts to break RNA position and make sure that error are caught """
        return
        tool = pybedtools.create_interval_from_list(
            "ENSMUSG1    51    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -    10    10"
            .split())
        location_dict = {
            "ENSMUSG1": {
                "strand": "-",
                "regions": [
                    (100, 150),
                    (25, 50),
                ]
            }
        }

        self.assertEqual(
            convert_to_mRNA_position(tool, location_dict).chrom, "none")

        tool = pybedtools.create_interval_from_list(
            "ENSMUSG1    51    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -    175    175"
            .split())

        self.assertEqual(
            convert_to_mRNA_position(tool, location_dict).chrom, "none")

        pybedtools.BedTool("chr1    x     y    ")
Example #19
0
def test_all_hits():
    a = pybedtools.example_bedtool('a.bed')

    assert [a[2], a[3]] == a.all_hits(pybedtools.create_interval_from_list(
                      ['chr1', '450', '905', '.', '.', '-']))

    assert [a[2]] == a.all_hits(pybedtools.create_interval_from_list(
                      ['chr1', '450', '905', '.', '.', '-']), same_strand=True)
Example #20
0
 def test_minus_feature_is_reversed_profile(self):
     feature1 = pybedtools.create_interval_from_list(['chr2L', '0', '20', '.', '.', '+'])
     feature2 = pybedtools.create_interval_from_list(['chr2L', '0', '20', '.', '.', '-'])
     x, y = self.m.local_coverage(feature1, fragment_size=5)
     xm, ym = self.m.local_coverage(feature2, fragment_size=5)
     pp(list(enumerate(zip(ym, y))))
     assert list(ym) == list(y[::-1])
     assert list(xm) == list(x)
Example #21
0
def test_count_hits():
    a = pybedtools.example_bedtool('a.bed')

    assert len(a.all_hits(pybedtools.create_interval_from_list(
                      ['chr1', '450', '905', '.', '.', '-']))) == 2

    assert len(a.all_hits(pybedtools.create_interval_from_list(
                      ['chr1', '450', '905', '.', '.', '-']), same_strand=True)) == 1
Example #22
0
def _add_biotype_attribute(gene_content):
    """
    Add `biotype` attribute to all intervals in gene_content.

    biotype attribute is equal to transcript_biotype value if present,
    else gene_biotype if present else value in column 2 (index 1). The
    last option can only happen in some of early ensembl releases.


    Parameters
    ----------
    gene_content_ : dict
        Intervals in gene separated by transcript id.

    Returns
    -------
    dict
        Same gene_content_ object with added `biotype` attributes.
    """
    gene_content = gene_content.copy()

    # Determine gene biotype:
    gbiotype = gene_content['gene'].attrs.get('gene_biotype', None)

    # List to keep track of all possible biotypes in gene:
    gene_biotypes = [gbiotype] if gbiotype else []

    for transcript_id, transcript_intervals in gene_content.items():
        if transcript_id == 'gene':
            continue
        new_intervals = []

        exon = [i for i in transcript_intervals if i[2] in ['CDS', 'ncRNA']][0]
        gbiotype = exon.attrs.get('gene_biotype', None)
        tbiotype = exon.attrs.get('transcript_biotype', None)
        biotype = tbiotype if tbiotype else (gbiotype if gbiotype else exon[1])
        gene_biotypes.append(biotype)
        for interval in transcript_intervals:
            col8 = interval[8] if interval[8] != '.' else ''
            new_intervals.append(
                create_interval_from_list(
                    interval[:8] + [col8 + ' biotype "{}";'.format(biotype)]))
        gene_content[transcript_id] = new_intervals

    # Finally, make also gene biotype: a list of all biotypes in gene,
    # sorted by frequency. Additionally, another sorting is added to sort
    # by alphabet if counts are equal.
    biotype = ', '.join([
        i[0] for i in sorted(sorted(Counter(gene_biotypes).items()),
                             key=lambda x: x[1],
                             reverse=True)
    ])
    interval = gene_content['gene']
    gene_content['gene'] = create_interval_from_list(
        interval[:8] + [interval[8] + ' biotype "[{}]";'.format(biotype)])

    return gene_content
Example #23
0
    def test_basic(self):
        seg_level0 = create_interval_from_list(['1', '.', 'CDS', '1', '2', '.', '+', '.', 'gene_name "G0";'])
        seg_level1 = create_interval_from_list(['1', '.', 'UTR3', '1', '2', '.', '+', '.', 'gene_name "G1";'])
        seg_level4 = create_interval_from_list(['1', '.', 'intron', '1', '2', '.', '+', '.', 'gene_name "G2";'])

        self.assertEqual('G0', landmark.get_gene_name(seg_level0, seg_level1))
        self.assertEqual('G0', landmark.get_gene_name(seg_level1, seg_level0))
        self.assertEqual('G1', landmark.get_gene_name(seg_level1, seg_level4))
        with self.assertRaises(ValueError):
            self.assertEqual('B', landmark.get_gene_name(seg_level0, seg_level0))
Example #24
0
def test_all_hits():
    a = pybedtools.example_bedtool('a.bed')

    assert [a[2], a[3]] == a.all_hits(
        pybedtools.create_interval_from_list(
            ['chr1', '450', '905', '.', '.', '-']))

    assert [a[2]] == a.all_hits(pybedtools.create_interval_from_list(
        ['chr1', '450', '905', '.', '.', '-']),
                                same_strand=True)
Example #25
0
 def test_minus_feature_is_reversed_profile(self):
     feature1 = pybedtools.create_interval_from_list(
         ['chr2L', '0', '20', '.', '.', '+'])
     feature2 = pybedtools.create_interval_from_list(
         ['chr2L', '0', '20', '.', '.', '-'])
     x, y = self.m.local_coverage(feature1, fragment_size=5)
     xm, ym = self.m.local_coverage(feature2, fragment_size=5)
     pp(list(enumerate(zip(ym, y))))
     assert list(ym) == list(y[::-1])
     assert list(xm) == list(x)
Example #26
0
def test_any_hits():
    a = pybedtools.example_bedtool('a.bed')

    assert 1 == a.any_hits(pybedtools.create_interval_from_list(
                      ['chr1', '900', '905', '.', '.', '-']))

    assert 0 == a.any_hits(pybedtools.create_interval_from_list(
                      ['chr1', '900', '905', '.', '.', '-']), same_strand=True)

    assert 0 == a.any_hits(pybedtools.create_interval_from_list(
                      ['chr1', '8000', '9000', '.', '.', '-']))
Example #27
0
def get_bedtool_4():
    """
    returns a bedtool containing 2 intervals 2nt and 10nt long
    within the CDS of ENSG00000188976.6
    """
    interval1 = pybedtools.create_interval_from_list(
        ['chr1', '881553', '881555', '.', '0', '+'])
    interval2 = pybedtools.create_interval_from_list(
        ['chr1', '881553', '881565', '.', '0', '+'])

    return pybedtools.BedTool([interval1, interval2])
def get_jx_region_as_interval_eric(row, x, event='se'):
    """
    returns a BedTools interval given an rmats annotation row spanning
    from the upstream-end to the downstream-start.

    Parameters
    ----------
    row : pandas.core.series.Series
        single row of a rMATS file
    x : basestring
        name given to the bedtools interval
    Returns
    -------
    pybedtools.BedTool.Interval
    """
    chrom, strand, _, _, _ = row['annotation'].split('|')

    if event == 'se' or event == 'mxe' or event == 'ri':
        low_start, low_end = [int(ex) for ex in row['low_exon'].split('-')]
        hi_start, hi_end = [int(ex) for ex in row['hi_exon'].split('-')]
        interval = bt.create_interval_from_list(
            [chrom, low_end, hi_start, x, '0',
             strand])
    elif event == 'a3ss':
        flank_start, flank_end = [int(ex) for ex in
                                  row['upstream_exon'].split('-')]
        short_start, short_end = [int(ex) for ex in
                                  row['short_exon'].split('-')]

        if strand == '+':
            interval = bt.create_interval_from_list(
                [chrom, flank_end, short_start, x, '0',
                 strand])
        else:
            interval = bt.create_interval_from_list(
                [chrom, short_end, flank_start, x, '0',
                 strand]
            )
    elif event == 'a5ss':
        flank_start, flank_end = [int(ex) for ex in
                                  row['downstream_exon'].split('-')]
        short_start, short_end = [int(ex) for ex in
                                  row['short_exon'].split('-')]

        if strand == '+':
            interval = bt.create_interval_from_list(
                [chrom, short_end, flank_start, x, '0',
                 strand])
        else:
            interval = bt.create_interval_from_list(
                [chrom, flank_end, short_start, x, '0',
                 strand]
            )
    return interval
Example #29
0
def test_gtf_gff_attrs():
    # smoke test.
    #
    # this has always worked:
    gff = ["chr1","fake","mRNA","51", "300",".", "+",".","ID=mRNA1;Parent=gene1;"]
    gff = pybedtools.create_interval_from_list(gff)
    gff.attrs

    # this previously failed because of the "=" in the attr string.
    gff = ['scaffold_52', 'Cufflinks', 'exon', '5478', '5568', '.', '+', '.', 'gene_id "XLOC_017766"; transcript_id "TCONS_00033979"; exon_number "6"; gene_name "g18412"; oId "PAC:26897502"; nearest_ref "PAC:26897502"; class_code "="; tss_id "TSS21210"; p_id "P18851";']
    gff = pybedtools.create_interval_from_list(gff)
    gff.attrs
Example #30
0
def test_count_hits():
    a = pybedtools.example_bedtool('a.bed')

    assert len(
        a.all_hits(
            pybedtools.create_interval_from_list(
                ['chr1', '450', '905', '.', '.', '-']))) == 2

    assert len(
        a.all_hits(pybedtools.create_interval_from_list(
            ['chr1', '450', '905', '.', '.', '-']),
                   same_strand=True)) == 1
Example #31
0
def test_interval_index():
    """
    supplement to the more general test in test_cbedtools.IntervalTest.testGetItemNegative
    """
    iv = pybedtools.create_interval_from_list('chr21   9719768 9721892 ALR/Alpha       1004    +'.split())
    assert iv[-1] == '+'
    assert iv[2:-1] == ['9721892', 'ALR/Alpha', '1004']

    iv = pybedtools.create_interval_from_list(
            ['chr1', 'ucb', 'gene', '465', '805', '.', '+', '.',
                'ID=thaliana_1_465_805;match=scaffold_801404.1;rname=thaliana_1_465_805'])
    print iv[4:-3]
    assert iv[4:-3] == ['805', '.']
Example #32
0
    def _create_window(self):
        istart = self.start  # interval start
        istop = self.start + self.size  # interval stop
        while istop < self.stop and istart < self.stop:
            window = map(str, [self.chrom, istart, istop])
            window = pybedtools.create_interval_from_list(window)
            yield window
            istart += self.slide
            istop += self.slide

        window = map(str, [self.chrom, istart, self.stop])
        window = pybedtools.create_interval_from_list(window)
        yield window
Example #33
0
def test_any_hits():
    a = pybedtools.example_bedtool('a.bed')

    assert 1 == a.any_hits(
        pybedtools.create_interval_from_list(
            ['chr1', '900', '905', '.', '.', '-']))

    assert 0 == a.any_hits(pybedtools.create_interval_from_list(
        ['chr1', '900', '905', '.', '.', '-']),
                           same_strand=True)

    assert 0 == a.any_hits(
        pybedtools.create_interval_from_list(
            ['chr1', '8000', '9000', '.', '.', '-']))
Example #34
0
    def values(self, chrom, start, end, strand, flatten=False):
        """

        Parameters
        ----------
        chrom : basestring
            (eg. chr1)
        start : int
            0-based start (first position in chromosome is 0)
        end : int
            1-based end (last position is not included)
        strand : str
            either '+' or '-'
        flatten : bool
            in the case where multiple peaks overlap a region,
            scores will be summed over these regions. If flatten = True,
            scores will be the minimum of the multiple peaks.

        Returns
        -------
        densities : list
            values corresponding to density over specified positions.
        """

        # Get all overlapping values
        region = pybedtools.create_interval_from_list(
            [chrom, str(start), str(end), '.', '0', strand])
        series = pd.Series(data=0, index=range(len(region)))
        try:
            overlapped_peaks = self.peaks.entries(chrom, start, end, strand)
        except RuntimeError as e:
            print(
                "weird entry (this can happen if the peak bb does not contain this chromosome, or if the region is invalid)"
                ": {}:{}-{}:{}".format(chrom, start, end, strand), e)
            return series

        if overlapped_peaks is None:
            return series
        else:
            for p in overlapped_peaks:
                bed_list = [chrom, str(p[0]), str(p[1])] + p[2].split('\t')
                if bed_list[5] == strand:
                    peak = pybedtools.create_interval_from_list(bed_list)
                    if flatten:
                        print('not implemented or important yet'
                              )  # TODO: implement flatten
                    else:
                        series += intervals.get_overlap(peak, region)
            return series
Example #35
0
def test_split_prox_dist_1():
    print("Tests the core functionality of assigning proximal and distal "
          "intron spaces. Should not return any distal introns based on the "
          "specified distance.")
    length = 10
    midpoint = 5
    intron_interval = pybedtools.create_interval_from_list(
        ['chr1', '0', str(length), 'intron', '0', '+'])
    proxdist_dict = af.get_proxdist_from_intron(interval=intron_interval,
                                                distance=midpoint)
    assert 'prox' in proxdist_dict.keys()  # found a prox intron region.
    assert len(proxdist_dict['dist']) == 0  # found no dist intron
    assert len(proxdist_dict['prox']) == 1  # just found one prox intron
    assert proxdist_dict['prox'][0] == pybedtools.create_interval_from_list(
        ['chr1', '0', str(length), 'proxintron5', '0', '+'])
Example #36
0
def _iter_bed_dict(bed, val_index=None):
    """Iterate through dict object."""
    if val_index is not None:
        for (chrome, strand), by_pos in bed.items():
            for pos, val in by_pos.items():
                val = val[val_index]
                yield pybedtools.create_interval_from_list(
                    [chrome, pos, pos + 1, '.', _f2s(val), strand]
                )
    else:
        for (chrome, strand), by_pos in bed.items():
            for pos, val in by_pos.items():
                yield pybedtools.create_interval_from_list(
                    [chrome, pos, pos + 1, '.', _f2s(val), strand]
                )
Example #37
0
def test_interval_index():
    """
    supplement to the more general test in test_cbedtools.IntervalTest.testGetItemNegative
    """
    iv = pybedtools.create_interval_from_list(
        'chr21   9719768 9721892 ALR/Alpha       1004    +'.split())
    assert iv[-1] == '+'
    assert iv[2:-1] == ['9721892', 'ALR/Alpha', '1004']

    iv = pybedtools.create_interval_from_list([
        'chr1', 'ucb', 'gene', '465', '805', '.', '+', '.',
        'ID=thaliana_1_465_805;match=scaffold_801404.1;rname=thaliana_1_465_805'
    ])
    print iv[4:-3]
    assert iv[4:-3] == ['805', '.']
Example #38
0
def build_transcript_data_gtf_as_structure(species, pre_mrna):
    """
    
    gtf_file - gtf file generated from AS_STRUCTURE_gtf ipython notebook 
    pre_mrna - if true uses pre mRNA length instead of mRNA length
    
    """
    bedtoolintervals = []
    x = clipper.data_file(species + ".AS.STRUCTURE.COMPILED.gff")
    gtf_file = pybedtools.BedTool(x)
    for gene in gtf_file:
        effective_length = gene.attrs[
            'premrna_length'] if pre_mrna else gene.attrs['mrna_length']
        attrs = "gene_id=%s;" % (gene.attrs['gene_id'])
        if "transcript_ids" in gene.attrs:
            attrs += "transcript_ids=%s;" % (gene.attrs['transcript_ids'])
        attrs += "effective_length=%s" % (str(effective_length))

        bedtoolintervals.append(
            pybedtools.create_interval_from_list(
                map(str, [
                    gene['chrom'], "AS_STRUCTURE", "mRNA",
                    str(gene.start + 1),
                    str(gene.stop + 1), "0", gene['strand'], ".", attrs
                ])))

    return pybedtools.BedTool(bedtoolintervals)
Example #39
0
 def test_plus_feature_minus_reads(self):
     # the plus-strand read from 10-14 should not appear -- so all zeros
     feature = pybedtools.create_interval_from_list(['chr2L', '0', '20', '.', '.', '+'])
     x, y = self.m.local_coverage(feature, read_strand='-', fragment_size=5)
     pp(zip(x, y))
     assert zip(x, y) == \
         [(0, 0),
          (1, 0),
          (2, 0),
          (3, 0),
          (4, 0),
          (5, 0),
          (6, 0),
          (7, 0),
          (8, 0),
          (9, 0),
          (10, 0),
          (11, 0),
          (12, 0),
          (13, 0),
          (14, 0),
          (15, 0),
          (16, 0),
          (17, 0),
          (18, 0),
          (19, 0)]
Example #40
0
 def test_fragmentsize(self):
     feature = pybedtools.create_interval_from_list(
         ['chr2L', '0', '25', '.', '.', '+'])
     x, y = self.m.local_coverage(feature, fragment_size=10)
     pp(zip(x, y))
     assert zip(x, y) == \
         [(0, 0),
          (1, 0),
          (2, 0),
          (3, 0),
          (4, 0),
          (5, 0),
          (6, 0),
          (7, 0),
          (8, 0),
          (9, 0),
          (10, 1),
          (11, 1),
          (12, 1),
          (13, 1),
          (14, 1),
          (15, 1),
          (16, 1),
          (17, 1),
          (18, 1),
          (19, 1),
          (20, 0),
          (21, 0),
          (22, 0),
          (23, 0),
          (24, 0)]
Example #41
0
 def test_identity_bins(self):
     # Same number of bins as bp in the feature
     feature = pybedtools.create_interval_from_list(['chr2L', '0', '20', '.', '.', '+'])
     x, y = self.m.local_coverage(feature, fragment_size=10, bins=20)
     pp(zip(x, y))
     assert np.allclose(
             np.array(zip(x, y)),
             np.array(
         [(0, 0),
          (1, 0),
          (2, 0),
          (3, 0),
          (4, 0),
          (5, 0),
          (6, 0),
          (7, 0),
          (8, 0),
          (9, 0),
          (10, 1),
          (11, 1),
          (12, 1),
          (13, 1),
          (14, 1),
          (15, 1),
          (16, 1),
          (17, 1),
          (18, 1),
          (19, 1)]))
Example #42
0
 def test_shiftwidth_and_fragmentsize(self):
     #
     # Reads on opposite strands shift oppositely...
     #
     #      |||||   original
     #
     #    +|||||    minus strand read, leftshift 1 and additional to left (3')
     #       |||||+ plus strand read, rightshift 1 and additional to right (3')
     #    111222111
     feature = pybedtools.create_interval_from_list(['chr2L', '60', '80', '.', '.', '+'])
     x, y = self.m.local_coverage(feature, fragment_size=6, shift_width=1)
     pp(zip(x, y))
     assert zip(x, y) == \
         [(60, 0),
          (61, 0),
          (62, 0),
          (63, 0),
          (64, 0),
          (65, 0),
          (66, 0),
          (67, 0),
          (68, 1),
          (69, 1),
          (70, 1),
          (71, 2),
          (72, 2),
          (73, 2),
          (74, 1),
          (75, 1),
          (76, 1),
          (77, 0),
          (78, 0),
          (79, 0)]
Example #43
0
    def test_plus_feature(self):
        # first make one where fragments are exactly as long as reads

        feature = pybedtools.create_interval_from_list(['chr2L', '0', '20', '.', '.', '+'])
        x, y = self.m.local_coverage(feature, fragment_size=5)
        pp(zip(x,y))
        assert zip(x, y) == \
                [(0, 0),
                 (1, 0),
                 (2, 0),
                 (3, 0),
                 (4, 0),
                 (5, 0),
                 (6, 0),
                 (7, 0),
                 (8, 0),
                 (9, 0),
                 (10, 1),
                 (11, 1),
                 (12, 1),
                 (13, 1),
                 (14, 1),
                 (15, 0),
                 (16, 0),
                 (17, 0),
                 (18, 0),
                 (19, 0)]
Example #44
0
 def test_minus_feature(self):
     # minus strand flips the profile
     feature = pybedtools.create_interval_from_list(['chr2L', '0', '20', '.', '.', '-'])
     xm, ym = self.m.local_coverage(feature, fragment_size=5)
     pp(zip(xm,ym))
     assert zip(xm, ym) == \
             [(0, 0),
              (1, 0),
              (2, 0),
              (3, 0),
              (4, 0),
              (5, 1),
              (6, 1),
              (7, 1),
              (8, 1),
              (9, 1),
              (10, 0),
              (11, 0),
              (12, 0),
              (13, 0),
              (14, 0),
              (15, 0),
              (16, 0),
              (17, 0),
              (18, 0),
              (19, 0)]
def redefine_regions(df):
    """
    Turns overlapping regions into distinct nonoverlapping regions.

    :param df: pandas.Dataframe()
        The to_dataframe() result of bedtools cluster call
    :return BedTool(non-overlapping interval): pybedtools.BedTool()
        The BedTool of nonoverlapping intervals.
    """

    positions = []
    intervals = []
    for col, row in df.iterrows():
        chrom = row['chrom']
        strand = row['strand']
        positions.append(row['start'])
        positions.append(row['end'])
    positions = sorted(set(positions))
    for p in range(0, len(positions[:-1])):
        intervals.append(
            bt.create_interval_from_list([
                chrom,
                str(positions[p]),
                str(positions[p + 1]), 'name', '0', strand
            ]))
    return bt.BedTool(intervals)
Example #46
0
def output_bed_coords_from_fasta(fasta_fname, bed_fname):
    """
    Output event coordinates from a FASTA file into a BED
    format.

    Assumes FASTA entry is of the form:

      >part_id:coords:entry_type
    """
    print "Converting FASTA %s to BED %s" %(fasta_fname,
                                            bed_fname)
    total_len = 0
    with open(bed_fname, "w") as bed_out:
        for fasta_entry in fastx_utils.get_fastx_entries(fasta_fname):
            fasta_name, fasta_seq = fasta_entry
            # Assume FASTA entry coordinates are in GFF format.
            # Convert them to BED
            if ";" not in fasta_name:
                raise Exception, "Malformed FASTA entry name: %s" %(fasta_name)
            gff_coords = fasta_name.split(";")[1]
            chrom, start, end, strand = parse_gff_coords(gff_coords)
            # Convert start to BED by subtracting one
            start = start - 1
            bed_entry = \
                pybedtools.create_interval_from_list(map(str, [chrom, start, end,
                                                               gff_coords, "1",
                                                               strand]))
            bed_out.write("%s" %(str(bed_entry)))
            # Accumulate total length of FASTA seqs
            total_len += len(fasta_seq)
    return total_len
Example #47
0
 def get_bedtool_iter():
     for gene_num, gene_entry in table.iterrows():
         chrom = gene_entry["chrom"]
         start = int(gene_entry["txStart"]) + 1
         end = int(gene_entry["txEnd"])
         strand = gene_entry["strand"]
         # Annotation fields
         name2 = gene_entry["name2"]
         if pandas.isnull(name2):
             name2 = "NA"
         refseq_id = gene_entry["refseq"]
         if pandas.isnull(refseq_id):
             refseq_id = "NA"
         gene_symbol = gene_entry[gene_symbol_col]
         if pandas.isnull(gene_symbol):
             gene_symbol = "NA"
         attributes = \
             "ID=%s;ensg_id=%s;refseq_id=%s;gsymbol=%s;" \
             %(name2,
               name2,
               refseq_id,
               gene_symbol)
         # Convert table to BedTool
         gff_entry = [chrom,
                      "genes_table",
                      "gene",
                      str(start),
                      str(end),
                      ".",
                      strand,
                      ".",
                      attributes]
         gff_interval = \
             pybedtools.create_interval_from_list(gff_entry)
         yield gff_interval
Example #48
0
def genotype_intervals(intervals_file=None, bam=None, workdir=None, window=GT_WINDOW, isize_mean=ISIZE_MEAN, isize_sd=ISIZE_SD, normal_frac_threshold=GT_NORMAL_FRAC):
    func_logger = logging.getLogger("%s-%s" % (genotype_intervals.__name__, multiprocessing.current_process()))

    if workdir and not os.path.isdir(workdir):
        os.makedirs(workdir)

    pybedtools.set_tempdir(workdir)

    genotyped_intervals = []
    start_time = time.time()

    isize_min = max(0, isize_mean - 3 * isize_sd)
    isize_max = isize_mean + 3 * isize_sd

    try:
        bam_handle = pysam.Samfile(bam, "rb")
        for interval in pybedtools.BedTool(intervals_file):
            chrom, start, end, sv_type, svlen = parse_interval(interval)
            genotype = genotype_interval(chrom, start, end, sv_type, svlen, bam_handle, isize_min, isize_max, window, normal_frac_threshold)
            fields = interval.fields + [genotype]
            genotyped_intervals.append(pybedtools.create_interval_from_list(fields))
        bedtool = pybedtools.BedTool(genotyped_intervals).moveto(os.path.join(workdir, "genotyped.bed"))
    except Exception as e:
        func_logger.error('Caught exception in worker thread')

        # This prints the type, value, and stack trace of the
        # current exception being handled.
        traceback.print_exc()

        print()
        raise e
    func_logger.info("Genotyped %d intervals in %g minutes" % (len(genotyped_intervals), (time.time() - start_time)/60.0))

    return bedtool.fn
Example #49
0
 def test_minus_feature(self):
     # minus strand flips the profile
     feature = pybedtools.create_interval_from_list(
         ['chr2L', '0', '20', '.', '.', '-'])
     xm, ym = self.m.local_coverage(feature, fragment_size=5)
     pp(zip(xm, ym))
     assert zip(xm, ym) == \
             [(0, 0),
              (1, 0),
              (2, 0),
              (3, 0),
              (4, 0),
              (5, 1),
              (6, 1),
              (7, 1),
              (8, 1),
              (9, 1),
              (10, 0),
              (11, 0),
              (12, 0),
              (13, 0),
              (14, 0),
              (15, 0),
              (16, 0),
              (17, 0),
              (18, 0),
              (19, 0)]
Example #50
0
def build_transcript_data_gtf_as_structure(species, pre_mrna):
    
    """
    
    gtf_file - gtf file generated from AS_STRUCTURE_gtf ipython notebook 
    pre_mrna - if true uses pre mRNA length instead of mRNA length
    
    """
    results = []
    x = clipper.data_file(species + ".AS.STRUCTURE.COMPILED.gff")
    gtf_file = pybedtools.BedTool(x)
    for gene in gtf_file:
        
        effective_length = gene.attrs['premrna_length'] if pre_mrna else gene.attrs['mrna_length']
        attrs = "gene_id=%s;" % (gene.attrs['gene_id'])
        if "transcript_ids" in gene.attrs:
            attrs += "transcript_ids=%s;" % (gene.attrs['transcript_ids']) 
        attrs += "effective_length=%s" % (str(effective_length)) 
        
        results.append(pybedtools.create_interval_from_list(map(str, [gene['chrom'], 
                                                                      "AS_STRUCTURE", 
                                                                      "mRNA", 
                                                                      str(gene.start + 1), 
                                                                      str(gene.stop + 1),
                                                                      "0", 
                                                                      gene['strand'], 
                                                                      ".",
                                                                      attrs
                                                                      ])))
        
            
    return pybedtools.BedTool(results)
Example #51
0
    def __getitem__(self, key):
        chrom = key.chrom
        start = key.start
        stop = key.end
        try:
            bx_intervals = self.fileobj.get(chrom, start, stop)
        except StrandFormatError:
            raise NotImplementedError(dedent(
                """
                It appears you have a version of bx-python where bigBed files
                are temporarily unsupported due to recent changes in the
                bx-python dependency. In the meantime, please convert bigBed to
                BAM like this:

                    bigBedToBed {0} tmp.bed
                    bedtools bedtobam -i tmp.bed > {0}.bam

                and create a genomic signal object using this {0}.bam file.
                """.format(self.fn)))
        if bx_intervals is None:
            raise StopIteration
        for i in bx_intervals:
            interval = pybedtools.create_interval_from_list(i.fields)
            interval.file_type = 'bed'
            yield interval
Example #52
0
def primer_to_gff(name, primer, tag, seq_name, seq_start, strand, **kwargs):
    """Create a gff feature from 
    partially parsed primer3 results.
    """
    pos, len = map(int, primer['position'].split(','))
    
    # transfer the calculated values to attributes
    # skip the fields used elsewhere in gff
    at = pybedtools.Attributes(' ')
    for k, v in primer.iteritems():
        if k == 'position': continue
        at[k] = v.replace(';', '%3B')
        
    at['ID'] = name
    
    # pass all optional params to attributes
    at.update({k:str(v) for k, v in kwargs.iteritems()})
    
    # primer3 provides the coordinates of right primer with the 
    # pos pointing to the last base
    if strand == '-':
        start = seq_start + pos - len + 2
        end = seq_start + pos + 1
    else:
        start = seq_start + pos + 1
        end = seq_start + pos + len

    gflist = [seq_name, 'design-primers', tag, 
        str(start), str(end),
        primer['PENALTY'], strand, '.', str(at)]

    return pybedtools.create_interval_from_list(gflist)
Example #53
0
def truncator(feature):
    """
    Convert a feature of any format into a BED3 format.
    """
    return pybedtools.create_interval_from_list(
        [feature.chrom, str(feature.start),
         str(feature.stop)])
Example #54
0
def create_bedtools(features, keys, by_transcript=False):
    """
    Given a list of features and chr19_keys dictionary, create a bedtool
    containing intervals of features whose name is specified
    using chr19_keys['gene_id']

    :param features: list
        list of gffutils features (1-based) for which to convert
        to bedtool intervals
    :param keys: dict
        a set of chr19_keys and values which helps translate different
        GTF/GFF nomenclatures (ie. 'cds'
    :return:
    """
    intervals = []
    key = 'transcript_id' if by_transcript else 'gene_id'
    progress = trange(len(features), desc='creating bedtools.')
    for feature in features:
        for i in range(len(feature.attributes[keys[key]])):
            interval = pybedtools.create_interval_from_list([
                feature.seqid,
                str(feature.start - 1),
                str(feature.end), feature.attributes[keys[key]][i], '0',
                feature.strand
            ])
            intervals.append(interval)
        progress.update(1)
    bedtool = pybedtools.BedTool(intervals)
    return bedtool
Example #55
0
 def test_shiftwidth_of_1_plus_only(self):
     # The plus-strand read should shift right by 1
     feature = pybedtools.create_interval_from_list(['chr2L', '0', '20', '.', '.', '+'])
     x, y = self.m.local_coverage(feature, fragment_size=5, shift_width=1)
     pp(zip(x, y))
     assert zip(x, y) == \
         [(0, 0),
          (1, 0),
          (2, 0),
          (3, 0),
          (4, 0),
          (5, 0),
          (6, 0),
          (7, 0),
          (8, 0),
          (9, 0),
          (10, 0),
          (11, 1),
          (12, 1),
          (13, 1),
          (14, 1),
          (15, 1),
          (16, 0),
          (17, 0),
          (18, 0),
          (19, 0)]
Example #56
0
def merge_peaks_count(trimmed_windows, read_pos_weights):
    # create BED intervals
    bed_intervals = []
    for wstart, wend in trimmed_windows:
        bed_a = ['chrFAKE', str(wstart-1), str(wend)]
        bed_intervals.append(pybedtools.create_interval_from_list(bed_a))
    bedtool = pybedtools.BedTool(bed_intervals)

    # merge BED intervals
    bedtool_merge = bedtool.merge(stream=True)

    # recount peaks
    read_positions = [pos for (pos,w) in read_pos_weights]
    peaks = []
    for bed_interval in bedtool_merge.features():
        pstart = bed_interval.start+1
        pend = bed_interval.end

        reads_start_i = bisect_left(read_positions, pstart)
        reads_end_i = bisect_right(read_positions, pend)
        # TODO: Count using the weights
        #read_count = reads_end_i - reads_start_i
        read_count = sum([read_pos_weights[i][1] for i in range(reads_start_i,reads_end_i)])

        peaks.append((pstart, pend, read_count))

    return peaks
Example #57
0
 def test_shift_of_0(self):
     feature = pybedtools.create_interval_from_list(['chr2L', '60', '80', '.', '.', '+'])
     x, y = self.m.local_coverage(feature, fragment_size=5, shift_width=0)
     pp(zip(x,y))
     assert zip(x, y) == \
         [(60, 0),
          (61, 0),
          (62, 0),
          (63, 0),
          (64, 0),
          (65, 0),
          (66, 0),
          (67, 0),
          (68, 0),
          (69, 0),
          (70, 2),
          (71, 2),
          (72, 2),
          (73, 2),
          (74, 2),
          (75, 0),
          (76, 0),
          (77, 0),
          (78, 0),
          (79, 0)]
Example #58
0
 def test_fragmentsize(self):
     feature = pybedtools.create_interval_from_list(['chr2L', '0', '25', '.', '.', '+'])
     x, y = self.m.local_coverage(feature, fragment_size=10)
     pp(zip(x, y))
     assert zip(x, y) == \
         [(0, 0),
          (1, 0),
          (2, 0),
          (3, 0),
          (4, 0),
          (5, 0),
          (6, 0),
          (7, 0),
          (8, 0),
          (9, 0),
          (10, 1),
          (11, 1),
          (12, 1),
          (13, 1),
          (14, 1),
          (15, 1),
          (16, 1),
          (17, 1),
          (18, 1),
          (19, 1),
          (20, 0),
          (21, 0),
          (22, 0),
          (23, 0),
          (24, 0)]
Example #59
0
 def test_RNA_position_fail(self):
     
     """ Various attempts to break RNA position and make sure that error are caught """
             
     tool = pybedtools.create_interval_from_list("chr1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -    10    10".split())
     location_dict = {"ENSMUSG1" : {"strand" : "-", "regions" : [(100, 150),
                                                                 (25,50),
                                                                 ] 
                                    }
                      }
     
     self.assertEqual(RNA_position(tool, location_dict), (None, None))
     
     tool = pybedtools.create_interval_from_list("chr1    50    60    ENSMUSG1_1_83;ENSMUSG1_6_83    0    -    175    175".split())
     
     self.assertEqual(RNA_position(tool, location_dict), (None, None))
Example #60
0
 def test_plus_feature_minus_reads(self):
     # the plus-strand read from 10-14 should not appear -- so all zeros
     feature = pybedtools.create_interval_from_list(
         ['chr2L', '0', '20', '.', '.', '+'])
     x, y = self.m.local_coverage(feature, read_strand='-', fragment_size=5)
     pp(zip(x, y))
     assert zip(x, y) == \
         [(0, 0),
          (1, 0),
          (2, 0),
          (3, 0),
          (4, 0),
          (5, 0),
          (6, 0),
          (7, 0),
          (8, 0),
          (9, 0),
          (10, 0),
          (11, 0),
          (12, 0),
          (13, 0),
          (14, 0),
          (15, 0),
          (16, 0),
          (17, 0),
          (18, 0),
          (19, 0)]