コード例 #1
0
    def test_limits_downstream(self):
        """Landmarks with too short upstream segment should not be used."""
        regions = make_file_from_list([
            [
                'chr1', '.', 'CDS', '150', '200', '.', '+', '.',
                'gene_name "A";'
            ],
            [
                'chr1', '.', 'intron', '201', '350', '.', '+', '.',
                'gene_name "A";'
            ],
        ])
        fn = rnamaps.make_landmarks_file(regions, 'exon-intron')
        self.assertEqual(make_list_from_file(fn), [])

        regions = make_file_from_list([
            [
                'chr1', '.', 'CDS', '151', '200', '.', '-', '.',
                'gene_name "A";'
            ],
            [
                'chr1', '.', 'intron', '201', '351', '.', '-', '.',
                'gene_name "A";'
            ],
        ])
        fn = rnamaps.make_landmarks_file(regions, 'intron-exon')
        self.assertEqual(make_list_from_file(fn), [])
コード例 #2
0
    def test_only_barcode5_1_mismatch(self):
        # Only barcode5, one mismatch
        demultiplex.run(self.fq_fname,
                        self.adapter,
                        self.barcodes5[:2],
                        mismatches=1,
                        out_dir=self.dir)

        demux_file = 'demux_{}.fastq.gz'.format(self.barcodes5[0])
        fq_list = make_list_from_file(os.path.join(self.dir, demux_file))
        self.assertEqual(fq_list[0], ['@header1:rbc:GGG/1'])
        self.assertEqual(fq_list[1], [self.entry1.seq[6:-10]])
        self.assertEqual(fq_list[3], [self.entry1.qual[6:-10]])

        demux_file = 'demux_{}.fastq.gz'.format(self.barcodes5[1])
        fq_list = make_list_from_file(os.path.join(self.dir, demux_file))
        self.assertEqual(fq_list[0], ['@header2:rbc:AA'])
        self.assertEqual(fq_list[1], [self.entry2.seq[5:-10]])
        self.assertEqual(fq_list[3], [self.entry2.qual[5:-10]])
        self.assertEqual(fq_list[4], ['@header3:rbc:TT'])
        self.assertEqual(fq_list[5], [self.entry3.seq[5:-10]])
        self.assertEqual(fq_list[7], [self.entry3.qual[5:-10]])

        demux_file = 'demux_{}.fastq.gz'.format('nomatch5')
        fq_list = make_list_from_file(os.path.join(self.dir, demux_file))
        self.assertEqual(fq_list, [])
コード例 #3
0
    def test_basic(self):
        regions = make_file_from_list([
            [
                'chr1', '.', 'CDS', '150', '200', '.', '+', '.',
                'gene_name "A";'
            ],
            [
                'chr1', '.', 'intron', '201', '351', '.', '+', '.',
                'gene_name "A";'
            ],
        ])
        fn = rnamaps.make_landmarks_file(regions, 'exon-intron')
        self.assertEqual(make_list_from_file(fn), [
            ['chr1', '200', '201', 'A', '.', '+'],
        ])

        regions = make_file_from_list([
            [
                'chr1', '.', 'CDS', '150', '200', '.', '-', '.',
                'gene_name "A";'
            ],
            [
                'chr1', '.', 'intron', '201', '351', '.', '-', '.',
                'gene_name "A";'
            ],
        ])
        fn = rnamaps.make_landmarks_file(regions, 'intron-exon')
        self.assertEqual(make_list_from_file(fn), [
            ['chr1', '199', '200', 'A', '.', '-'],
        ])
コード例 #4
0
ファイル: test_externals.py プロジェクト: ulelab/iCount-Mini
 def test_overwrite(self):
     original_seq = make_list_from_file(self.reads)[1][0]
     return_code = cutadapt.run(self.reads, self.adapter, overwrite=True)
     trimmed_seq = make_list_from_file(self.reads)[1][0]
     self.assertTrue(original_seq.endswith(self.adapter))
     self.assertEqual(original_seq[:-(len(self.adapter))], trimmed_seq)
     self.assertEqual(return_code, 0)
     self.assertEqual(return_code, 0)
コード例 #5
0
ファイル: test_externals.py プロジェクト: ulelab/iCount-Mini
 def test_simple(self):
     return_code = cutadapt.run(self.reads,
                                self.adapter,
                                reads_trimmed=self.tmp,
                                qual_trim=0,
                                minimum_length=20)
     original_seq = make_list_from_file(self.reads)[1][0]
     trimmed_seq = make_list_from_file(self.tmp)[1][0]
     self.assertTrue(original_seq.endswith(self.adapter))
     self.assertEqual(original_seq[:-(len(self.adapter))], trimmed_seq)
     self.assertEqual(return_code, 0)
コード例 #6
0
    def get_summary_reports(self, annotation, cross_links):
        """Help running tests for ``summary_report`` with less clutter."""
        annotation_file = make_file_from_list(annotation)
        cross_links_file = make_file_from_list(cross_links)

        segment.summary_templates(annotation_file, self.out_dir)
        summary.summary_reports(annotation_file, cross_links_file, self.out_dir, self.out_dir)
        return [
            make_list_from_file(os.path.join(self.out_dir, segment.SUMMARY_TYPE), '\t'),
            make_list_from_file(os.path.join(self.out_dir, segment.SUMMARY_SUBTYPE), '\t'),
            make_list_from_file(os.path.join(self.out_dir, segment.SUMMARY_GENE), '\t'),
        ]
コード例 #7
0
ファイル: test_region.py プロジェクト: yx-xu/iCount
    def test_templates1(self):
        out_dir = get_temp_dir()
        segmentation = make_file_from_list([
            ['1', '.', 'intergenic', '1', '10', '.', '+', '.', 'gene_id ".";'],
            [
                '1', '.', 'UTR3', '11', '20', '.', '+', '.',
                'biotype "mRNA";gene_name "ABC";gene_id "G1";'
            ],
            [
                '1', '.', 'intron', '21', '30', '.', '+', '.',
                'biotype "lncRNA";gene_name "ABC";gene_id "G1";'
            ],
            [
                '1', '.', 'CDS', '31', '40', '.', '+', '.',
                'biotype "mRNA";gene_name "DEF";gene_id "G2";'
            ],
            [
                '1', '.', 'intron', '41', '50', '.', '+', '.',
                'biotype "sRNA,lncRNA";gene_name "DEF"; gene_id "G2";'
            ],
        ])
        region.summary_templates(segmentation, out_dir)

        results_type = make_list_from_file(
            os.path.join(out_dir, region.TEMPLATE_TYPE), '\t')
        self.assertEqual(results_type, [
            ['CDS', '10'],
            ['UTR3', '10'],
            ['intron', '20'],
            ['intergenic', '10'],
        ])

        results_subtype = make_list_from_file(os.path.join(
            out_dir, region.TEMPLATE_SUBTYPE),
                                              fields_separator='\t')
        self.assertEqual(results_subtype, [
            ['CDS mRNA', '10'],
            ['UTR3 mRNA', '10'],
            ['intron lncRNA', '15'],
            ['intron sRNA', '5'],
            ['intergenic', '10'],
        ])

        results_gene = make_list_from_file(os.path.join(
            out_dir, region.TEMPLATE_GENE),
                                           fields_separator='\t')
        self.assertEqual(results_gene, [
            ['.', '', '10'],
            ['G1', 'ABC', '20'],
            ['G2', 'DEF', '20'],
        ])
コード例 #8
0
ファイル: test_rnamaps.py プロジェクト: dblenkus/iCount
    def test_explicit_whole_in(self):
        """
        Whole read is in single transcript and is crossing the exon-intron
        landmark (it is explicit). Provide three reads, with two different
        cross-links. One cross-link has two distinct randomers.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 140, 255, [(0, 50)], {
                    'NH': 1
                }),
                ('name2:rbc:AAAA', 0, 0, 142, 255, [(0, 50)], {
                    'NH': 1
                }),
                ('name2:rbc:CCCC', 0, 0, 142, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['UTR5-intron', '-10', '1', '1'],
            ['UTR5-intron', '-8', '2', '2'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))
コード例 #9
0
ファイル: test_rnamaps.py プロジェクト: dblenkus/iCount
    def test_implicit_exons(self):
        """
        Whole read is in single transcript and in single segment. Also, this
        segment is of EXON_TYPE in the "middle" segment in transcript. Only one read.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 205, 255, [(0, 20)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-UTR3', '-25', '0.25', '0'],
            ['CDS-intron', '-25', '0.25', '0'],
            ['UTR5-CDS', '5', '0.25', '0'],
            ['intron-CDS', '5', '0.25', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
コード例 #10
0
ファイル: test_rnamaps.py プロジェクト: yuanchuntian/iCount
    def test_implicit_inter_tr(self):
        """
        Whole read is in single transcript, single segment. But the segment
        borders on intergenic (downstream).
        """
        bam = make_bam_file(
            {
                'chromosomes': [('1', 1000)],
                'segments': [
                    # (qname, flag, refname, pos, mapq, cigar, tags)
                    ('name2:rbc:CCCC', 0, 0, 610, 255, [(0, 30)], {
                        'NH': 1
                    }),
                ]
            },
            rnd_seed=0)

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-CDS', '-40', '0.3333', '0'],
            ['CDS-intron', '-40', '0.3333', '0'],
            ['intergenic-CDS', '10', '0.3333', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
コード例 #11
0
ファイル: test_segment.py プロジェクト: yx-xu/iCount
    def test_complement(self):

        genome_file = make_file_from_list(
            [
                ['1', '2000'],
                ['2', '1000'],
                ['MT', '500'],
            ], bedtool=False)

        genes = list_to_intervals([
            ['1', '.', 'gene1', '200', '400', '.', '+', '.', '.'],
            ['1', '.', 'gene2', '300', '600', '.', '+', '.', '.'],
            ['1', '.', 'gene3', '200', '500', '.', '+', '.', '.'],
            ['2', '.', 'gene4', '100', '200', '.', '+', '.', '.'],
            ['2', '.', 'gene5', '100', '300', '.', '-', '.', '.'],
        ])

        complement = make_list_from_file(segment._complement(genes, genome_file, '+'), fields_separator='\t')

        empty_col8 = 'ID "inter%s"; gene_id "."; transcript_id ".";'
        expected = [
            ['1', '.', 'intergenic', '1', '199', '.', '+', '.', empty_col8 % "P00000"],
            ['1', '.', 'intergenic', '601', '2000', '.', '+', '.', empty_col8 % "P00001"],
            ['2', '.', 'intergenic', '1', '99', '.', '+', '.', empty_col8 % "P00002"],
            ['2', '.', 'intergenic', '201', '1000', '.', '+', '.', empty_col8 % "P00003"],
            ['MT', '.', 'intergenic', '1', '500', '.', '+', '.', empty_col8 % "P00004"],
        ]

        self.assertEqual(complement, expected)
コード例 #12
0
ファイル: test_clusters.py プロジェクト: matteofloris/iCount
    def test_clusters(self):
        fin_sites = make_file_from_list([
            ['1', '1', '2', '.', '1', '+'],
            ['1', '2', '3', '.', '1', '+'],
            ['1', '3', '4', '.', '1', '+'],
            ['1', '4', '5', '.', '2', '+'],
            ['1', '4', '5', '.', '1', '-'],
            ['1', '5', '6', '.', '1', '+'],
            ['1', '6', '7', '.', '1', '-'],
            ['1', '7', '8', '.', '1', '-'],
            ['1', '10', '11', '.', '1', '+'],
            ['1', '11', '12', '.', '2', '+'],
            ['1', '12', '13', '.', '1', '+'],
        ])

        fin_peaks = make_file_from_list([
            ['1', '4', '5', 'cl1', '1', '+'],
            ['1', '4', '5', 'cl2', '1', '-'],
            ['1', '5', '6', 'cl3', '1', '+'],
            ['1', '11', '12', 'cl4', '2', '+'],
        ])

        fout_clusters = get_temp_file_name()

        clusters.run(fin_sites, fin_peaks, fout_clusters, dist=3, slop=2)
        result = make_list_from_file(fout_clusters, fields_separator='\t')

        expected = [
            ['1', '2', '6', 'cl1,cl3', '5', '+'],
            ['1', '4', '7', 'cl2', '2', '-'],
            ['1', '10', '13', 'cl4', '4', '+'],
        ]

        self.assertEqual(expected, result)
コード例 #13
0
ファイル: test_files.py プロジェクト: ulelab/iCount-Mini
 def test_bed2bedgraph_params(self):
     """
     Test with custom parameters.
     """
     iCount.files.bedgraph.bed2bedgraph(
         self.bed,
         self.bedgraph,
         name='Sample name',
         description='A long and detailed description.',
         visibility='full',
         priority=20,
         color='256,0,0',
         alt_color='0,256,0',
         max_height_pixels='100:50:0',
     )
     expected = [
         [
             'track type=bedGraph name="Sample name" description="A long and detailed description."'
             ' visibility=full priority=20 color=256,0,0 altColor=0,256,0 maxHeightPixels=100:50:0'
         ],
         ['1', '4', '5', '+5'],
         ['1', '5', '6', '+1'],
         ['1', '5', '6', '-1'],
         ['2', '5', '6', '+3'],
     ]
     result = make_list_from_file(self.bedgraph, fields_separator='\t')
     self.assertEqual(result, expected)
コード例 #14
0
def template(cross_links, annotation, subtype='biotype',
             excluded_types=None):
    """
    Utility function for testing iCount.analysis.annotate

    Instead of input files, accept the file content in form of lists and create
    temporary files from them on the fly. This avoids the problem of having a
    bunch of multiple small files or one large file (which would violate the
    idea of test isolation).

    For example of how to use this function check any test that uses it.

    Parameters
    ----------
    cross_links : list
        List representation of cross-links file.
    annotation : list
        List representation of annotation file.

    Returns
    -------
    list
        List representation of output file of analysis.annotate().

    """
    cross_links_file = make_file_from_list(cross_links, extension='bed.gz')
    annotation_file = make_file_from_list(annotation, extension='gtf.gz')
    out_file = get_temp_file_name(extension='bed.gz')
    annotate.annotate_cross_links(annotation_file, cross_links_file, out_file, subtype=subtype,
                                  excluded_types=excluded_types)
    return make_list_from_file(out_file, fields_separator='\t')
コード例 #15
0
ファイル: test_rnamaps.py プロジェクト: dblenkus/iCount
    def test_negative_strand(self):
        """
        Whole read is in single transcript, single segment. But the segment
        borders on intergenic (downstream).
        """
        gtf_neg_data = [
            i[:6] + ['-'] + i[7:] for i in intervals_to_list(self.gtf_data)
        ]
        gtf_neg = make_file_from_list(gtf_neg_data)
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 16, 0, 549, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '20', '0.5', '0'],
            ['intergenic-CDS', '-80', '0.5', '0'],
        ]

        rnamaps.run(bam,
                    gtf_neg,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
コード例 #16
0
ファイル: test_rnamaps.py プロジェクト: yuanchuntian/iCount
    def test_normalisation(self):
        norm_file = get_temp_file_name(extension='txt')
        rnamaps.make_normalization(self.gtf, norm_file)

        expected = [
            ['RNAmap_type', 'distance', 'segments'],
            ['CDS-UTR3', '-1', '1'],
            ['CDS-UTR3', '0', '1'],
            ['CDS-UTR3', '1', '1'],
            ['CDS-intron', '-1', '1'],
            ['CDS-intron', '0', '1'],
            ['CDS-intron', '1', '1'],
            ['CDS-intron', '2', '1'],
            ['integrenic-CDS', '-2', '1'],
            ['integrenic-CDS', '-1', '1'],
            ['integrenic-CDS', '0', '1'],
            ['intron-UTR3', '-3', '1'],
            ['intron-UTR3', '-2', '1'],
            ['intron-UTR3', '-1', '1'],
            ['intron-UTR3', '0', '1'],
            ['intron-UTR3', '1', '1'],
            ['intron-ncRNA', '-1', '1'],
            ['intron-ncRNA', '0', '1'],
            ['ncRNA-integrenic', '-1', '1'],
            ['ncRNA-integrenic', '0', '1'],
            ['ncRNA-integrenic', '1', '1'],
            ['ncRNA-intron', '-2', '1'],
            ['ncRNA-intron', '-1', '1'],
            ['ncRNA-intron', '0', '1'],
            ['ncRNA-ncRNA', '-2', '1'],
            ['ncRNA-ncRNA', '-1', '1'],
            ['ncRNA-ncRNA', '0', '1'],
        ]

        self.assertEqual(expected, make_list_from_file(norm_file))
コード例 #17
0
ファイル: test_rnamaps.py プロジェクト: dblenkus/iCount
    def test_implicit_whole_in(self):
        """
        Whole read is in single transcript and in single segment. Also, this
        segment is the "middle" segment in transcript. Provide three reads, with
        two different cross-links. One cross-link has two distinct randomers.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 160, 255, [(0, 30)], {
                    'NH': 1
                }),
                ('name2:rbc:CCCC', 0, 0, 163, 255, [(0, 30)], {
                    'NH': 1
                }),
                ('name2:rbc:GGGG', 0, 0, 163, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['UTR5-intron', '10', '1', '0'],
            ['UTR5-intron', '13', '2', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))
コード例 #18
0
ファイル: test_rnamaps.py プロジェクト: dblenkus/iCount
    def test_cross_transcript_read(self):
        """
        Read is half in transcript region and half in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 235, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            [
                'chrom', 'strand', 'xlink', 'second-start', 'end-position',
                'read_len'
            ],
            ['1', '+', '234', '0', '284', '50'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.cross_tr))
コード例 #19
0
ファイル: test_rnamaps.py プロジェクト: dblenkus/iCount
    def test_implicit_intergenic(self):
        """
        Whole read is in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 530, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '30', '0.5', '0'],
            ['intergenic-CDS', '-70', '0.5', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))
コード例 #20
0
ファイル: test_rnamaps.py プロジェクト: dblenkus/iCount
    def test_explicit_intergenic_right(self):
        """
        Read is half in transcript region and half in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 480, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '-20', '1', '1'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))
コード例 #21
0
    def test_run(self):
        fin_annotation = make_file_from_list([
            [
                '1', '.', 'gene', '10', '20', '.', '+', '.',
                'gene_name "A"; gene_id "1";'
            ],
            [
                '1', '.', 'transcript', '10', '20', '.', '+', '.',
                'gene_name "B"; gene_id "1";'
            ],
            [
                '2', '.', 'CDS', '10', '20', '.', '+', '.',
                'gene_name "C"; gene_id "1";'
            ],
        ])

        fin_sites = make_file_from_list([
            ['1', '14', '15', '.', '3', '+'],
            ['1', '16', '17', '.', '5', '+'],
            ['2', '16', '17', '.', '5', '+'],
        ])

        fout_peaks = get_temp_file_name(extension='.bed.gz')
        fout_scores = get_temp_file_name(extension='.tsv.gz')

        peaks.run(fin_annotation, fin_sites, fout_peaks, scores=fout_scores)

        out_peaks = make_list_from_file(fout_peaks, fields_separator='\t')
        out_scores = make_list_from_file(fout_scores, fields_separator='\t')
        # Remove header:
        out_scores = out_scores[1:]

        expected_peaks = [
            ['1', '14', '15', 'A-1', '3', '+'],
            ['1', '16', '17', 'A-1', '5', '+'],
        ]
        expected_scores = [
            ['1', '14', '+', 'A', '1', '3', '8', '0.036198'],
            ['1', '16', '+', 'A', '1', '5', '8', '0.036198'],
            [
                '2', '16', '+', 'not_annotated', 'not_annotated', '5',
                'not_calculated', '1'
            ],
        ]

        self.assertEqual(out_peaks, expected_peaks)
        self.assertEqual(out_scores, expected_scores)
コード例 #22
0
ファイル: test_region.py プロジェクト: yx-xu/iCount
    def test_basic(self):
        segmentation = [
            # Transcript #1
            [
                '1', '.', 'ncRNA', '1', '10', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'intron', '11', '20', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'CDS', '21', '30', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            # Transcript #1
            [
                '1', '.', 'CDS', '5', '14', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'intron', '15', '24', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'CDS', '25', '34', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            # Also negative strand:
            [
                '1', '.', 'CDS', '3', '32', '.', '-', '.',
                'biotype "A"; gene_name "X";'
            ],
        ]
        expected = [
            ['1', '0', '4', '.', '.', '+'],
            ['1', '4', '10', '.', '.', '+'],
            ['1', '10', '14', '.', '.', '+'],
            ['1', '14', '20', '.', '.', '+'],
            ['1', '20', '24', '.', '.', '+'],
            ['1', '24', '30', '.', '.', '+'],
            ['1', '30', '34', '.', '.', '+'],
            ['1', '34', '40', '.', '.', '+'],
            ['1', '2', '32', '.', '.', '-'],
        ]

        segmentation_file = make_file_from_list(segmentation)
        borders_file = region.construct_borders(BedTool(segmentation_file))
        results = make_list_from_file(borders_file, fields_separator='\t')
        self.assertEqual(
            expected,
            # Sort results by chrom, strand, start, stop
            sorted(results, key=lambda x: (x[0], x[-1], int(x[1]), int(x[2]))))
コード例 #23
0
    def test_all_good(self):
        gtf_in_data = list_to_intervals([
            ['1', '.', 'gene', '400', '500', '.', '+', '.',
             'gene_id "G2";'],
            ['1', '.', 'transcript', '400', '500', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'exon', '400', '430', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "1"'],
            ['1', '.', 'CDS', '410', '430', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'exon', '470', '500', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "2"'],
            ['1', '.', 'CDS', '470', '490', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
        ])
        gtf_in_file = make_file_from_list(intervals_to_list(gtf_in_data))

        gtf_out = tempfile.NamedTemporaryFile(mode='w+', delete=False)
        gtf_out.close()

        genome_file = make_file_from_list(
            [
                ['1', '2000'],
                ['MT', '500'],
            ], bedtool=False)

        gtf_out_data = list_to_intervals(make_list_from_file(segment.get_regions(
            gtf_in_file, gtf_out.name, genome_file), fields_separator='\t'))

        expected = list_to_intervals([
            ['1', '.', 'intergenic', '1', '399', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            ['1', '.', 'intergenic', '1', '2000', '.', '-', '.',
             'gene_id "."; transcript_id ".";'],
            ['1', '.', 'transcript', '400', '500', '.', '+', '.',
             'gene_id "G2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'UTR5', '400', '409', '.', '+', '.',
             'gene_id "G2";exon_number "1";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'gene', '400', '500', '.', '+', '.',
             'gene_id "G2"; biotype "[.]";'],
            ['1', '.', 'CDS', '410', '430', '.', '+', '.',
             'gene_id "G2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'intron', '431', '469', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; biotype ".";'],
            ['1', '.', 'CDS', '470', '490', '.', '+', '.',
             'gene_id "G2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'UTR3', '491', '500', '.', '+', '.',
             'gene_id "G2";exon_number "2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'intergenic', '501', '2000', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            ['MT', '.', 'intergenic', '1', '500', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            ['MT', '.', 'intergenic', '1', '500', '.', '-', '.',
             'gene_id "."; transcript_id ".";'],
        ])

        self.assertEqual(expected, gtf_out_data)
コード例 #24
0
ファイル: test_bed.py プロジェクト: nebo56ucl/iCount
def merge_bed_wrapper(data):
    """
    TODO
    """
    files = []
    for file_ in data:
        files.append(make_file_from_list(file_))
    out_file = tempfile.NamedTemporaryFile(delete=False).name
    return make_list_from_file(merge_bed(out_file, files),
                               fields_separator='\t')
コード例 #25
0
def merge_bed_wrapper(data):
    """
    TODO
    """
    files = []
    for file_ in data:
        files.append(make_file_from_list(file_))
    out_file = get_temp_file_name()
    merge_bed(out_file, files)
    return make_list_from_file(out_file, fields_separator='\t')
コード例 #26
0
ファイル: test_files.py プロジェクト: bakerwm/iCount
 def test_bed2bedgraph(self):
     iCount.files.bedgraph.bed2bedgraph(self.bed, self.bedgraph)
     expected = [
         ['track type=bedGraph name="User Track" description="User Supplied Track"'],
         ['1', '4', '5', '+5'],
         ['1', '5', '6', '+1'],
         ['1', '5', '6', '-1'],
         ['2', '5', '6', '+3'],
     ]
     result = make_list_from_file(self.bedgraph, fields_separator='\t')
     self.assertEqual(result, expected)
コード例 #27
0
ファイル: test_region.py プロジェクト: yx-xu/iCount
    def test_basic(self):
        # seg is compositon of BED6 and GTF interval:
        nonmerged = make_file_from_list([
            [
                '1', '.', 'UTR3', '1', '10', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '11', '20', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '21', '30', '.', '+', '.',
                'biotype "lncRNA";gene_id "id2";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '-', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
        ])

        expected = [
            [
                '1', '.', 'UTR3', '1', '20', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '21', '30', '.', '+', '.',
                'biotype "lncRNA";gene_id "id2";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '-', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
        ]

        region.merge_regions(nonmerged, self.tmp)
        results = make_list_from_file(self.tmp, fields_separator='\t')
        # Since order of attrs can be arbitrary, equality checks are more complex:
        for res, exp in zip(results, expected):
            self.assertEqual(res[:8], exp[:8])
            self.assertEqual(
                ';'.join(sorted(res[8].split(';'))),
                ';'.join(sorted(exp[8].split(';'))),
            )
コード例 #28
0
ファイル: test_landmark.py プロジェクト: yx-xu/iCount
    def test_basic(self):
        regions = make_file_from_list([
            ['chr1', '.', 'CDS', '150', '200', '.', '+', '.', 'gene_name "A";'],
            ['chr1', '.', 'intron', '201', '400', '.', '+', '.', 'gene_name "A";'],
            ['chr1', '.', 'CDS', '401', '600', '.', '+', '.', 'gene_name "A";'],
        ])

        landmarks = get_temp_file_name(extension='bed')
        landmark.make_landmarks(regions, landmarks)
        self.assertEqual(make_list_from_file(landmarks), [
            ['chr1', '200', '201', 'exon-intron;A', '.', '+'],
            ['chr1', '400', '401', 'intron-exon;A', '.', '+'],
        ])
コード例 #29
0
ファイル: test_files.py プロジェクト: bakerwm/iCount
 def test_fastq_file_write(self):
     data = [
         ['@header1', 'AAA', '+', 'FFF'],
         ['@header2', 'AAAA', '+', 'FFFF'],
     ]
     fq_file_name = get_temp_file_name(extension='fq.gz')
     fq_file = iCount.files.fastq.FastqFile(fq_file_name, 'wt')
     for line in data:
         fq_file.write(iCount.files.fastq.FastqEntry(*line))
     fq_file.close()
     result = make_list_from_file(fq_file_name)
     expected = [['@header1'], ['AAA'], ['+'], ['FFF'], ['@header2'], ['AAAA'], ['+'], ['FFFF']]
     self.assertEqual(result, expected)
コード例 #30
0
ファイル: test_files.py プロジェクト: bakerwm/iCount
    def test_bed2bedgraph_params(self):
        """
        Test with custom ``name`` and ``description`` parameters.

        Note that ``name`` is too long and is trimmed to 15 characters.
        """
        iCount.files.bedgraph.bed2bedgraph(
            self.bed, self.bedgraph, name='Longer than 15 chars.', description='Custom text.')
        expected = [
            ['track type=bedGraph name="Longer than 15 " description="Custom text."'],
            ['1', '4', '5', '+5'],
            ['1', '5', '6', '+1'],
            ['1', '5', '6', '-1'],
            ['2', '5', '6', '+3'],
        ]
        result = make_list_from_file(self.bedgraph, fields_separator='\t')
        self.assertEqual(result, expected)