Esempio n. 1
0
 def test_write_si_pos_from_exons(self):
     exons = {
         "FBtr0077958":
         [[
             '2L', 'FlyBase', 'CDS', 1143371, 1143410, '.', '+', '0',
             'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077958"; transcript_symbol "MFS3-RA";'
         ],
          [
              '2L', 'FlyBase', 'CDS', 1145444, 1146472, '.', '+', '2',
              'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077958"; transcript_symbol "MFS3-RA";'
          ],
          [
              '2L', 'FlyBase', 'CDS', 1146539, 1146825, '.', '+', '2',
              'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077958"; transcript_symbol "MFS3-RA";'
          ]],
         "FBtr0077959":
         [[
             '2L', 'FlyBase', 'CDS', 1143371, 1143410, '.', '-', '0',
             'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077959"; transcript_symbol "MFS3-RA";'
         ],
          [
              '2L', 'FlyBase', 'CDS', 1145444, 1146472, '.', '-', '2',
              'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077959"; transcript_symbol "MFS3-RA";'
          ],
          [
              '2L', 'FlyBase', 'CDS', 1146539, 1146825, '.', '-', '2',
              'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077959"; transcript_symbol "MFS3-RA";'
          ]]
     }
     outbed = "tests/write_si_pos_from_exons_observed.bed"
     expected = "tests/write_si_pos_from_exons_expected.bed"
     write_si_pos_from_exons(exons, outbed, add_chr=False)
     observed = rw.read_as_string(outbed)
     expected = rw.read_as_string(expected)
     self.assertEqual(expected, observed)
Esempio n. 2
0
 def test_get_spliced_reads_exon_filters_exon_start_window(self):
     reads_file = "tests/get_spliced_reads_exon_start_window_input_reads.bed"
     output_file_spliced = "tests/get_spliced_reads_exon_filters_exon_start_window_observed_spliced.bed"
     output_file_unspliced = "tests/get_spliced_reads_exon_filters_exon_start_window_observed_unspliced.bed"
     exon_junctions_file = "tests/get_spliced_reads_input_junctions.bed"
     expected_file_spliced = "tests/get_spliced_reads_exon_filters_exon_start_window_expected_spliced.bed"
     expected_file_unspliced = "tests/get_spliced_reads_exon_start_window_expected_unspliced.bed"
     exons = {}
     exons["FBtr0078168"] = [
         ["chr2L", "havana", "exon", 2, 6, "FBtr0078168", ".", "+"],
         ["chr2L", "havana", "exon", 11, 13, "FBtr0078168", ".", "+"],
         ["chr2L", "havana", "exon", 17, 19, "FBtr0078168", ".", "+"],
         ["chr2L", "havana", "exon", 22, 26, "FBtr0078168", ".", "+"]
     ]
     exons["FBtr0078169"] = [
         ["chr2L", "havana", "exon", 22, 26, "FBtr0078169", ".", "-"],
         ["chr2L", "havana", "exon", 17, 19, "FBtr0078169", ".", "-"],
         ["chr2L", "havana", "exon", 11, 13, "FBtr0078169", ".", "-"],
         ["chr2L", "havana", "exon", 2, 6, "FBtr0078169", ".", "-"]
     ]
     get_spliced_reads(reads_file,
                       exon_junctions_file,
                       output_file_spliced,
                       output_file_unspliced,
                       exons=exons,
                       overhang=0,
                       filter_start=1,
                       filter_end=3)
     expected_spliced = rw.read_as_string(expected_file_spliced)
     expected_unspliced = rw.read_as_string(expected_file_unspliced)
     expected = expected_spliced + expected_unspliced
     observed_spliced = rw.read_as_string(output_file_spliced)
     observed_unspliced = rw.read_as_string(output_file_unspliced)
     observed = observed_spliced + observed_unspliced
     self.assertEqual(expected, observed)
Esempio n. 3
0
 def test_snr_bed(self):
     inbed = "tests/snr_bed_input.bed"
     outbed = "tests/snr_bed_observed.bed"
     snr_bed(inbed, outbed)
     expected = rw.read_as_string("tests/snr_bed_expected.bed")
     observed = rw.read_as_string(outbed)
     self.assertEqual(expected, observed)
Esempio n. 4
0
 def test_snr_bed_fiveprime(self):
     inbed = "tests/snr_bed_fiveprime_input.bed"
     outbed = "tests/snr_bed_fiveprime_observed.bed"
     snr_bed(inbed, outbed, five_prime_most=True)
     expected = rw.read_as_string("tests/snr_bed_fiveprime_expected.bed")
     observed = rw.read_as_string(outbed)
     self.assertEqual(expected, observed)
Esempio n. 5
0
 def test_get_transcripts(self):
     gtf = "tests/get_transcripts_input.gtf"
     observed_file = "tests/get_transcripts_observed.bed"
     expected_file = "tests/get_transcripts_expected.bed"
     get_transcripts(gtf, observed_file)
     expected = rw.read_as_string(expected_file)
     observed = rw.read_as_string(observed_file)
     self.assertEqual(expected, observed)
Esempio n. 6
0
 def test_density_per_transcript(self):
     exon_file = "tests/density_per_transcript_input_exons.gtf"
     polII_bed = "tests/density_per_transcript_input_polII.bed"
     observed = "tests/density_per_transcript_observed.txt"
     expected = "tests/density_per_transcript_expected.txt"
     density_per_transcript(exon_file, polII_bed, observed)
     expected = rw.read_as_string(expected)
     observed = rw.read_as_string(observed)
     self.assertEqual(expected, observed)
Esempio n. 7
0
 def test_merge_bed(self):
     in_bed = "tests/merge_bed_input.bed"
     out_bed = "tests/merge_bed_output.bed"
     expected_bed = "tests/merge_bed_expected.bed"
     distance = 2
     merge_bed(in_bed, out_bed, distance)
     expected = rw.read_as_string(expected_bed)
     observed = rw.read_as_string(out_bed)
     self.assertEqual(expected, observed)
Esempio n. 8
0
 def test_extend_intervals(self):
     input_file = "tests/extend_intervals_input.bed"
     expected = "tests/extend_intervals_expected.bed"
     observed = "tests/extend_intervals_observed.bed"
     left_shift = 4
     right_shift = 5
     extend_intervals(input_file, observed, left_shift, right_shift)
     expected = rw.read_as_string(expected)
     observed = rw.read_as_string(observed)
     self.assertEqual(expected, observed)
Esempio n. 9
0
 def test_extend_intervals_three_prime(self):
     input_file = "tests/extend_intervals_input.bed"
     expected = "tests/extend_intervals_three_prime_expected.bed"
     observed = "tests/extend_intervals_three_prime_observed.bed"
     left_shift = 4
     right_shift = 2
     extend_intervals(input_file,
                      observed,
                      left_shift,
                      right_shift,
                      three_prime=True)
     expected = rw.read_as_string(expected)
     observed = rw.read_as_string(observed)
     self.assertEqual(expected, observed)
Esempio n. 10
0
    def test_extract_3ss(self):
        exons = {}
        exons["FBgn1"] = [
            [
                "2L", "FlyBase", "exon", 1, 4, ".", "+", ".",
                "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";"
            ],
            [
                "2L", "FlyBase", "exon", 9, 10, ".", "+", ".",
                "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";"
            ],
            [
                "2L", "FlyBase", "exon", 12, 14, ".", "+", ".",
                "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";"
            ],
            [
                "2L", "FlyBase", "exon", 17, 18, ".", "+", ".",
                "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";"
            ],
            [
                "2L", "FlyBase", "exon", 21, 23, ".", "+", ".",
                "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";"
            ]
        ]

        exons["FBgn2"] = [
            [
                "2L", "FlyBase", "exon", 22, 23, ".", "-", ".",
                "gene_id \"FBgn2\"; gene_symbol \"drl\"; transcript_id \"FBtr3\"; transcript_symbol \"drl-RA\";"
            ],
            [
                "2L", "FlyBase", "exon", 18, 19, ".", "-", ".",
                "gene_id \"FBgn2\"; gene_symbol \"drl\"; transcript_id \"FBtr3\"; transcript_symbol \"drl-RA\";"
            ],
            [
                "2L", "FlyBase", "exon", 13, 14, ".", "-", ".",
                "gene_id \"FBgn2\"; gene_symbol \"drl\"; transcript_id \"FBtr3\"; transcript_symbol \"drl-RA\";"
            ],
            [
                "2L", "FlyBase", "exon", 4, 7, ".", "-", ".",
                "gene_id \"FBgn2\"; gene_symbol \"drl\"; transcript_id \"FBtr3\"; transcript_symbol \"drl-RA\";"
            ]
        ]

        output_file = "tests/extract_3ss_observed.bed"
        expected = rw.read_as_string("tests/extract_3ss_expected.bed")
        extract_3ss(exons, output_file)
        observed = rw.read_as_string(output_file)
        self.assertEqual(expected, observed)
Esempio n. 11
0
def extend_intervals(input_file, output_file, left_shift, right_shift, remove_chr=False, add_chr=False, names_file=None, three_prime = False):
    """
    Given a BED file, make a new BED file with intervals that start _left_shift_ nt upstream
    of the interval starts in the original file and end _right_shift_ nt to the right.
    Note that for intervals on the negative strand, right and left will be
    reversed.
    :param input_file: input BED file
    :param output_file: output BED file
    :param left_shift: distance between old and new interval start
    :param right_shift: distance between old interval start and new interval end
    :param remove_chr: if True, remove "chr" from the chromosome name in the output
    :param add_chr: if True, prefix "chr" to the chromosome name in the output
    :param names_file: if specified, then reads will only be processed
    if the ID is in the specified file
    :param three_prime: if True, extend around interval ends instead
    :return: None
    """
    remove_counter = 0
    names = []
    if names_file:
        names = rw.read_as_string(names_file).split("\n")
    plus = "+"
    if three_prime:
        plus = "-"
        temp_left = left_shift
        left_shift = right_shift
        right_shift = temp_left
    with open(input_file) as bed, open(output_file, "w") as out_bed:
        reader = csv.reader(bed, delimiter="\t")
        writer = csv.writer(out_bed, delimiter="\t")
        for line in reader:
            if (not names_file) or (line[3] in names):
                if names_file:
                    names.remove(line[3])
                template = line.copy()
            # make a BED interval starting _left_shift_ nt before the 5' end and ending _right_shift_ nt after it.
                if len(line) >= 6:
                    template = line[:6]
                    if line[5] == plus:
                        template[1] = int(line[1]) - left_shift
                        template[2] = int(line[1]) + right_shift
                    else:
                        template[1] = int(line[2]) - right_shift
                        template[2] = int(line[2]) + left_shift
                # write the interval into a BED file, ignoring cases where the
                # read is so close to the start of the chromosome that you end up with a
                # negative coordinate
                    if template[1] >= 0:
                        if remove_chr:
                            template[0] = template[0].lstrip("chr")
                        if add_chr:
                            template[0] = "chr{0}".format(template[0])
                        writer.writerow(template)
                    else:
                        remove_counter = remove_counter + 1
    print("Removed because would have exceeded the chromosome: {0}.".format(remove_counter))
Esempio n. 12
0
 def test_get_spliced_reads_no_filters(self):
     reads_file = "tests/get_spliced_reads_input_reads.bed"
     output_file_spliced = "tests/get_spliced_reads_observed_spliced.bed"
     output_file_unspliced = "tests/get_spliced_reads_observed_unspliced.bed"
     exon_junctions_file = "tests/get_spliced_reads_input_junctions.bed"
     expected_file_spliced = "tests/get_spliced_reads_expected_spliced.bed"
     expected_file_unspliced = "tests/get_spliced_reads_expected_unspliced.bed"
     get_spliced_reads(reads_file,
                       exon_junctions_file,
                       output_file_spliced,
                       output_file_unspliced,
                       exons=False,
                       overhang=0)
     expected_spliced = rw.read_as_string(expected_file_spliced)
     expected_unspliced = rw.read_as_string(expected_file_unspliced)
     expected = expected_spliced + expected_unspliced
     observed_spliced = rw.read_as_string(output_file_spliced)
     observed_unspliced = rw.read_as_string(output_file_unspliced)
     observed = observed_spliced + observed_unspliced
     self.assertEqual(expected, observed)