Python make_list_from_file Examples, iCount.tests.utils.make_list_from_file Python Examples

Example #1

0

Show file

    def test_limits_downstream(self):
        """Landmarks with too short upstream segment should not be used."""
        regions = make_file_from_list([
            [
                'chr1', '.', 'CDS', '150', '200', '.', '+', '.',
                'gene_name "A";'
            ],
            [
                'chr1', '.', 'intron', '201', '350', '.', '+', '.',
                'gene_name "A";'
            ],
        ])
        fn = rnamaps.make_landmarks_file(regions, 'exon-intron')
        self.assertEqual(make_list_from_file(fn), [])

        regions = make_file_from_list([
            [
                'chr1', '.', 'CDS', '151', '200', '.', '-', '.',
                'gene_name "A";'
            ],
            [
                'chr1', '.', 'intron', '201', '351', '.', '-', '.',
                'gene_name "A";'
            ],
        ])
        fn = rnamaps.make_landmarks_file(regions, 'intron-exon')
        self.assertEqual(make_list_from_file(fn), [])

Example #2

0

Show file

    def test_only_barcode5_1_mismatch(self):
        # Only barcode5, one mismatch
        demultiplex.run(self.fq_fname,
                        self.adapter,
                        self.barcodes5[:2],
                        mismatches=1,
                        out_dir=self.dir)

        demux_file = 'demux_{}.fastq.gz'.format(self.barcodes5[0])
        fq_list = make_list_from_file(os.path.join(self.dir, demux_file))
        self.assertEqual(fq_list[0], ['@header1:rbc:GGG/1'])
        self.assertEqual(fq_list[1], [self.entry1.seq[6:-10]])
        self.assertEqual(fq_list[3], [self.entry1.qual[6:-10]])

        demux_file = 'demux_{}.fastq.gz'.format(self.barcodes5[1])
        fq_list = make_list_from_file(os.path.join(self.dir, demux_file))
        self.assertEqual(fq_list[0], ['@header2:rbc:AA'])
        self.assertEqual(fq_list[1], [self.entry2.seq[5:-10]])
        self.assertEqual(fq_list[3], [self.entry2.qual[5:-10]])
        self.assertEqual(fq_list[4], ['@header3:rbc:TT'])
        self.assertEqual(fq_list[5], [self.entry3.seq[5:-10]])
        self.assertEqual(fq_list[7], [self.entry3.qual[5:-10]])

        demux_file = 'demux_{}.fastq.gz'.format('nomatch5')
        fq_list = make_list_from_file(os.path.join(self.dir, demux_file))
        self.assertEqual(fq_list, [])

Example #3

0

Show file

    def test_basic(self):
        regions = make_file_from_list([
            [
                'chr1', '.', 'CDS', '150', '200', '.', '+', '.',
                'gene_name "A";'
            ],
            [
                'chr1', '.', 'intron', '201', '351', '.', '+', '.',
                'gene_name "A";'
            ],
        ])
        fn = rnamaps.make_landmarks_file(regions, 'exon-intron')
        self.assertEqual(make_list_from_file(fn), [
            ['chr1', '200', '201', 'A', '.', '+'],
        ])

        regions = make_file_from_list([
            [
                'chr1', '.', 'CDS', '150', '200', '.', '-', '.',
                'gene_name "A";'
            ],
            [
                'chr1', '.', 'intron', '201', '351', '.', '-', '.',
                'gene_name "A";'
            ],
        ])
        fn = rnamaps.make_landmarks_file(regions, 'intron-exon')
        self.assertEqual(make_list_from_file(fn), [
            ['chr1', '199', '200', 'A', '.', '-'],
        ])

Example #4

0

Show file

File: test_externals.py Project: ulelab/iCount-Mini

 def test_overwrite(self):
     original_seq = make_list_from_file(self.reads)[1][0]
     return_code = cutadapt.run(self.reads, self.adapter, overwrite=True)
     trimmed_seq = make_list_from_file(self.reads)[1][0]
     self.assertTrue(original_seq.endswith(self.adapter))
     self.assertEqual(original_seq[:-(len(self.adapter))], trimmed_seq)
     self.assertEqual(return_code, 0)
     self.assertEqual(return_code, 0)

Example #5

0

Show file

File: test_externals.py Project: ulelab/iCount-Mini

 def test_simple(self):
     return_code = cutadapt.run(self.reads,
                                self.adapter,
                                reads_trimmed=self.tmp,
                                qual_trim=0,
                                minimum_length=20)
     original_seq = make_list_from_file(self.reads)[1][0]
     trimmed_seq = make_list_from_file(self.tmp)[1][0]
     self.assertTrue(original_seq.endswith(self.adapter))
     self.assertEqual(original_seq[:-(len(self.adapter))], trimmed_seq)
     self.assertEqual(return_code, 0)

Example #6

0

Show file

    def get_summary_reports(self, annotation, cross_links):
        """Help running tests for ``summary_report`` with less clutter."""
        annotation_file = make_file_from_list(annotation)
        cross_links_file = make_file_from_list(cross_links)

        segment.summary_templates(annotation_file, self.out_dir)
        summary.summary_reports(annotation_file, cross_links_file, self.out_dir, self.out_dir)
        return [
            make_list_from_file(os.path.join(self.out_dir, segment.SUMMARY_TYPE), '\t'),
            make_list_from_file(os.path.join(self.out_dir, segment.SUMMARY_SUBTYPE), '\t'),
            make_list_from_file(os.path.join(self.out_dir, segment.SUMMARY_GENE), '\t'),
        ]

Example #7

0

Show file

File: test_region.py Project: yx-xu/iCount

    def test_templates1(self):
        out_dir = get_temp_dir()
        segmentation = make_file_from_list([
            ['1', '.', 'intergenic', '1', '10', '.', '+', '.', 'gene_id ".";'],
            [
                '1', '.', 'UTR3', '11', '20', '.', '+', '.',
                'biotype "mRNA";gene_name "ABC";gene_id "G1";'
            ],
            [
                '1', '.', 'intron', '21', '30', '.', '+', '.',
                'biotype "lncRNA";gene_name "ABC";gene_id "G1";'
            ],
            [
                '1', '.', 'CDS', '31', '40', '.', '+', '.',
                'biotype "mRNA";gene_name "DEF";gene_id "G2";'
            ],
            [
                '1', '.', 'intron', '41', '50', '.', '+', '.',
                'biotype "sRNA,lncRNA";gene_name "DEF"; gene_id "G2";'
            ],
        ])
        region.summary_templates(segmentation, out_dir)

        results_type = make_list_from_file(
            os.path.join(out_dir, region.TEMPLATE_TYPE), '\t')
        self.assertEqual(results_type, [
            ['CDS', '10'],
            ['UTR3', '10'],
            ['intron', '20'],
            ['intergenic', '10'],
        ])

        results_subtype = make_list_from_file(os.path.join(
            out_dir, region.TEMPLATE_SUBTYPE),
                                              fields_separator='\t')
        self.assertEqual(results_subtype, [
            ['CDS mRNA', '10'],
            ['UTR3 mRNA', '10'],
            ['intron lncRNA', '15'],
            ['intron sRNA', '5'],
            ['intergenic', '10'],
        ])

        results_gene = make_list_from_file(os.path.join(
            out_dir, region.TEMPLATE_GENE),
                                           fields_separator='\t')
        self.assertEqual(results_gene, [
            ['.', '', '10'],
            ['G1', 'ABC', '20'],
            ['G2', 'DEF', '20'],
        ])

Example #8

0

Show file

File: test_rnamaps.py Project: dblenkus/iCount

    def test_explicit_whole_in(self):
        """
        Whole read is in single transcript and is crossing the exon-intron
        landmark (it is explicit). Provide three reads, with two different
        cross-links. One cross-link has two distinct randomers.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 140, 255, [(0, 50)], {
                    'NH': 1
                }),
                ('name2:rbc:AAAA', 0, 0, 142, 255, [(0, 50)], {
                    'NH': 1
                }),
                ('name2:rbc:CCCC', 0, 0, 142, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['UTR5-intron', '-10', '1', '1'],
            ['UTR5-intron', '-8', '2', '2'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))

Example #9

0

Show file

File: test_rnamaps.py Project: dblenkus/iCount

    def test_implicit_exons(self):
        """
        Whole read is in single transcript and in single segment. Also, this
        segment is of EXON_TYPE in the "middle" segment in transcript. Only one read.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 205, 255, [(0, 20)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-UTR3', '-25', '0.25', '0'],
            ['CDS-intron', '-25', '0.25', '0'],
            ['UTR5-CDS', '5', '0.25', '0'],
            ['intron-CDS', '5', '0.25', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))

Example #10

0

Show file

File: test_rnamaps.py Project: yuanchuntian/iCount

    def test_implicit_inter_tr(self):
        """
        Whole read is in single transcript, single segment. But the segment
        borders on intergenic (downstream).
        """
        bam = make_bam_file(
            {
                'chromosomes': [('1', 1000)],
                'segments': [
                    # (qname, flag, refname, pos, mapq, cigar, tags)
                    ('name2:rbc:CCCC', 0, 0, 610, 255, [(0, 30)], {
                        'NH': 1
                    }),
                ]
            },
            rnd_seed=0)

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-CDS', '-40', '0.3333', '0'],
            ['CDS-intron', '-40', '0.3333', '0'],
            ['intergenic-CDS', '10', '0.3333', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))

Example #11

0

Show file

File: test_segment.py Project: yx-xu/iCount

    def test_complement(self):

        genome_file = make_file_from_list(
            [
                ['1', '2000'],
                ['2', '1000'],
                ['MT', '500'],
            ], bedtool=False)

        genes = list_to_intervals([
            ['1', '.', 'gene1', '200', '400', '.', '+', '.', '.'],
            ['1', '.', 'gene2', '300', '600', '.', '+', '.', '.'],
            ['1', '.', 'gene3', '200', '500', '.', '+', '.', '.'],
            ['2', '.', 'gene4', '100', '200', '.', '+', '.', '.'],
            ['2', '.', 'gene5', '100', '300', '.', '-', '.', '.'],
        ])

        complement = make_list_from_file(segment._complement(genes, genome_file, '+'), fields_separator='\t')

        empty_col8 = 'ID "inter%s"; gene_id "."; transcript_id ".";'
        expected = [
            ['1', '.', 'intergenic', '1', '199', '.', '+', '.', empty_col8 % "P00000"],
            ['1', '.', 'intergenic', '601', '2000', '.', '+', '.', empty_col8 % "P00001"],
            ['2', '.', 'intergenic', '1', '99', '.', '+', '.', empty_col8 % "P00002"],
            ['2', '.', 'intergenic', '201', '1000', '.', '+', '.', empty_col8 % "P00003"],
            ['MT', '.', 'intergenic', '1', '500', '.', '+', '.', empty_col8 % "P00004"],
        ]

        self.assertEqual(complement, expected)

Example #12

0

Show file

File: test_clusters.py Project: matteofloris/iCount

    def test_clusters(self):
        fin_sites = make_file_from_list([
            ['1', '1', '2', '.', '1', '+'],
            ['1', '2', '3', '.', '1', '+'],
            ['1', '3', '4', '.', '1', '+'],
            ['1', '4', '5', '.', '2', '+'],
            ['1', '4', '5', '.', '1', '-'],
            ['1', '5', '6', '.', '1', '+'],
            ['1', '6', '7', '.', '1', '-'],
            ['1', '7', '8', '.', '1', '-'],
            ['1', '10', '11', '.', '1', '+'],
            ['1', '11', '12', '.', '2', '+'],
            ['1', '12', '13', '.', '1', '+'],
        ])

        fin_peaks = make_file_from_list([
            ['1', '4', '5', 'cl1', '1', '+'],
            ['1', '4', '5', 'cl2', '1', '-'],
            ['1', '5', '6', 'cl3', '1', '+'],
            ['1', '11', '12', 'cl4', '2', '+'],
        ])

        fout_clusters = get_temp_file_name()

        clusters.run(fin_sites, fin_peaks, fout_clusters, dist=3, slop=2)
        result = make_list_from_file(fout_clusters, fields_separator='\t')

        expected = [
            ['1', '2', '6', 'cl1,cl3', '5', '+'],
            ['1', '4', '7', 'cl2', '2', '-'],
            ['1', '10', '13', 'cl4', '4', '+'],
        ]

        self.assertEqual(expected, result)

Example #13

0

Show file

File: test_files.py Project: ulelab/iCount-Mini

 def test_bed2bedgraph_params(self):
     """
     Test with custom parameters.
     """
     iCount.files.bedgraph.bed2bedgraph(
         self.bed,
         self.bedgraph,
         name='Sample name',
         description='A long and detailed description.',
         visibility='full',
         priority=20,
         color='256,0,0',
         alt_color='0,256,0',
         max_height_pixels='100:50:0',
     )
     expected = [
         [
             'track type=bedGraph name="Sample name" description="A long and detailed description."'
             ' visibility=full priority=20 color=256,0,0 altColor=0,256,0 maxHeightPixels=100:50:0'
         ],
         ['1', '4', '5', '+5'],
         ['1', '5', '6', '+1'],
         ['1', '5', '6', '-1'],
         ['2', '5', '6', '+3'],
     ]
     result = make_list_from_file(self.bedgraph, fields_separator='\t')
     self.assertEqual(result, expected)

Example #14

0

Show file

def template(cross_links, annotation, subtype='biotype',
             excluded_types=None):
    """
    Utility function for testing iCount.analysis.annotate

    Instead of input files, accept the file content in form of lists and create
    temporary files from them on the fly. This avoids the problem of having a
    bunch of multiple small files or one large file (which would violate the
    idea of test isolation).

    For example of how to use this function check any test that uses it.

    Parameters
    ----------
    cross_links : list
        List representation of cross-links file.
    annotation : list
        List representation of annotation file.

    Returns
    -------
    list
        List representation of output file of analysis.annotate().

    """
    cross_links_file = make_file_from_list(cross_links, extension='bed.gz')
    annotation_file = make_file_from_list(annotation, extension='gtf.gz')
    out_file = get_temp_file_name(extension='bed.gz')
    annotate.annotate_cross_links(annotation_file, cross_links_file, out_file, subtype=subtype,
                                  excluded_types=excluded_types)
    return make_list_from_file(out_file, fields_separator='\t')

Example #15

0

Show file

File: test_rnamaps.py Project: dblenkus/iCount

    def test_negative_strand(self):
        """
        Whole read is in single transcript, single segment. But the segment
        borders on intergenic (downstream).
        """
        gtf_neg_data = [
            i[:6] + ['-'] + i[7:] for i in intervals_to_list(self.gtf_data)
        ]
        gtf_neg = make_file_from_list(gtf_neg_data)
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 16, 0, 549, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '20', '0.5', '0'],
            ['intergenic-CDS', '-80', '0.5', '0'],
        ]

        rnamaps.run(bam,
                    gtf_neg,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))

Example #16

0

Show file

File: test_rnamaps.py Project: yuanchuntian/iCount

    def test_normalisation(self):
        norm_file = get_temp_file_name(extension='txt')
        rnamaps.make_normalization(self.gtf, norm_file)

        expected = [
            ['RNAmap_type', 'distance', 'segments'],
            ['CDS-UTR3', '-1', '1'],
            ['CDS-UTR3', '0', '1'],
            ['CDS-UTR3', '1', '1'],
            ['CDS-intron', '-1', '1'],
            ['CDS-intron', '0', '1'],
            ['CDS-intron', '1', '1'],
            ['CDS-intron', '2', '1'],
            ['integrenic-CDS', '-2', '1'],
            ['integrenic-CDS', '-1', '1'],
            ['integrenic-CDS', '0', '1'],
            ['intron-UTR3', '-3', '1'],
            ['intron-UTR3', '-2', '1'],
            ['intron-UTR3', '-1', '1'],
            ['intron-UTR3', '0', '1'],
            ['intron-UTR3', '1', '1'],
            ['intron-ncRNA', '-1', '1'],
            ['intron-ncRNA', '0', '1'],
            ['ncRNA-integrenic', '-1', '1'],
            ['ncRNA-integrenic', '0', '1'],
            ['ncRNA-integrenic', '1', '1'],
            ['ncRNA-intron', '-2', '1'],
            ['ncRNA-intron', '-1', '1'],
            ['ncRNA-intron', '0', '1'],
            ['ncRNA-ncRNA', '-2', '1'],
            ['ncRNA-ncRNA', '-1', '1'],
            ['ncRNA-ncRNA', '0', '1'],
        ]

        self.assertEqual(expected, make_list_from_file(norm_file))

Example #17

0

Show file

File: test_rnamaps.py Project: dblenkus/iCount

    def test_implicit_whole_in(self):
        """
        Whole read is in single transcript and in single segment. Also, this
        segment is the "middle" segment in transcript. Provide three reads, with
        two different cross-links. One cross-link has two distinct randomers.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 160, 255, [(0, 30)], {
                    'NH': 1
                }),
                ('name2:rbc:CCCC', 0, 0, 163, 255, [(0, 30)], {
                    'NH': 1
                }),
                ('name2:rbc:GGGG', 0, 0, 163, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['UTR5-intron', '10', '1', '0'],
            ['UTR5-intron', '13', '2', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))

Example #18

0

Show file

File: test_rnamaps.py Project: dblenkus/iCount

    def test_cross_transcript_read(self):
        """
        Read is half in transcript region and half in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 235, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            [
                'chrom', 'strand', 'xlink', 'second-start', 'end-position',
                'read_len'
            ],
            ['1', '+', '234', '0', '284', '50'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.cross_tr))

Example #19

0

Show file

File: test_rnamaps.py Project: dblenkus/iCount

    def test_implicit_intergenic(self):
        """
        Whole read is in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 530, 255, [(0, 30)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '30', '0.5', '0'],
            ['intergenic-CDS', '-70', '0.5', '0'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1,
                    implicit_handling='split')
        self.assertEqual(expected, make_list_from_file(self.out))

Example #20

0

Show file

File: test_rnamaps.py Project: dblenkus/iCount

    def test_explicit_intergenic_right(self):
        """
        Read is half in transcript region and half in intergenic.
        """
        bam = make_bam_file({
            'chromosomes': [('1', 1000)],
            'segments': [
                # (qname, flag, refname, pos, mapq, cigar, tags)
                ('name2:rbc:CCCC', 0, 0, 480, 255, [(0, 50)], {
                    'NH': 1
                }),
            ]
        })

        expected = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-intergenic', '-20', '1', '1'],
        ]

        rnamaps.run(bam,
                    self.gtf,
                    self.out,
                    self.strange,
                    self.cross_tr,
                    mismatches=1)
        self.assertEqual(expected, make_list_from_file(self.out))

Example #21

0

Show file

    def test_run(self):
        fin_annotation = make_file_from_list([
            [
                '1', '.', 'gene', '10', '20', '.', '+', '.',
                'gene_name "A"; gene_id "1";'
            ],
            [
                '1', '.', 'transcript', '10', '20', '.', '+', '.',
                'gene_name "B"; gene_id "1";'
            ],
            [
                '2', '.', 'CDS', '10', '20', '.', '+', '.',
                'gene_name "C"; gene_id "1";'
            ],
        ])

        fin_sites = make_file_from_list([
            ['1', '14', '15', '.', '3', '+'],
            ['1', '16', '17', '.', '5', '+'],
            ['2', '16', '17', '.', '5', '+'],
        ])

        fout_peaks = get_temp_file_name(extension='.bed.gz')
        fout_scores = get_temp_file_name(extension='.tsv.gz')

        peaks.run(fin_annotation, fin_sites, fout_peaks, scores=fout_scores)

        out_peaks = make_list_from_file(fout_peaks, fields_separator='\t')
        out_scores = make_list_from_file(fout_scores, fields_separator='\t')
        # Remove header:
        out_scores = out_scores[1:]

        expected_peaks = [
            ['1', '14', '15', 'A-1', '3', '+'],
            ['1', '16', '17', 'A-1', '5', '+'],
        ]
        expected_scores = [
            ['1', '14', '+', 'A', '1', '3', '8', '0.036198'],
            ['1', '16', '+', 'A', '1', '5', '8', '0.036198'],
            [
                '2', '16', '+', 'not_annotated', 'not_annotated', '5',
                'not_calculated', '1'
            ],
        ]

        self.assertEqual(out_peaks, expected_peaks)
        self.assertEqual(out_scores, expected_scores)

Example #22

0

Show file

File: test_region.py Project: yx-xu/iCount

    def test_basic(self):
        segmentation = [
            # Transcript #1
            [
                '1', '.', 'ncRNA', '1', '10', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'intron', '11', '20', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'CDS', '21', '30', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            # Transcript #1
            [
                '1', '.', 'CDS', '5', '14', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'intron', '15', '24', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            [
                '1', '.', 'CDS', '25', '34', '.', '+', '.',
                'biotype "A"; gene_name "X";'
            ],
            # Also negative strand:
            [
                '1', '.', 'CDS', '3', '32', '.', '-', '.',
                'biotype "A"; gene_name "X";'
            ],
        ]
        expected = [
            ['1', '0', '4', '.', '.', '+'],
            ['1', '4', '10', '.', '.', '+'],
            ['1', '10', '14', '.', '.', '+'],
            ['1', '14', '20', '.', '.', '+'],
            ['1', '20', '24', '.', '.', '+'],
            ['1', '24', '30', '.', '.', '+'],
            ['1', '30', '34', '.', '.', '+'],
            ['1', '34', '40', '.', '.', '+'],
            ['1', '2', '32', '.', '.', '-'],
        ]

        segmentation_file = make_file_from_list(segmentation)
        borders_file = region.construct_borders(BedTool(segmentation_file))
        results = make_list_from_file(borders_file, fields_separator='\t')
        self.assertEqual(
            expected,
            # Sort results by chrom, strand, start, stop
            sorted(results, key=lambda x: (x[0], x[-1], int(x[1]), int(x[2]))))

Example #23

0

Show file

    def test_all_good(self):
        gtf_in_data = list_to_intervals([
            ['1', '.', 'gene', '400', '500', '.', '+', '.',
             'gene_id "G2";'],
            ['1', '.', 'transcript', '400', '500', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'exon', '400', '430', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "1"'],
            ['1', '.', 'CDS', '410', '430', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'exon', '470', '500', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "2"'],
            ['1', '.', 'CDS', '470', '490', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
        ])
        gtf_in_file = make_file_from_list(intervals_to_list(gtf_in_data))

        gtf_out = tempfile.NamedTemporaryFile(mode='w+', delete=False)
        gtf_out.close()

        genome_file = make_file_from_list(
            [
                ['1', '2000'],
                ['MT', '500'],
            ], bedtool=False)

        gtf_out_data = list_to_intervals(make_list_from_file(segment.get_regions(
            gtf_in_file, gtf_out.name, genome_file), fields_separator='\t'))

        expected = list_to_intervals([
            ['1', '.', 'intergenic', '1', '399', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            ['1', '.', 'intergenic', '1', '2000', '.', '-', '.',
             'gene_id "."; transcript_id ".";'],
            ['1', '.', 'transcript', '400', '500', '.', '+', '.',
             'gene_id "G2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'UTR5', '400', '409', '.', '+', '.',
             'gene_id "G2";exon_number "1";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'gene', '400', '500', '.', '+', '.',
             'gene_id "G2"; biotype "[.]";'],
            ['1', '.', 'CDS', '410', '430', '.', '+', '.',
             'gene_id "G2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'intron', '431', '469', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; biotype ".";'],
            ['1', '.', 'CDS', '470', '490', '.', '+', '.',
             'gene_id "G2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'UTR3', '491', '500', '.', '+', '.',
             'gene_id "G2";exon_number "2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'intergenic', '501', '2000', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            ['MT', '.', 'intergenic', '1', '500', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            ['MT', '.', 'intergenic', '1', '500', '.', '-', '.',
             'gene_id "."; transcript_id ".";'],
        ])

        self.assertEqual(expected, gtf_out_data)

Example #24

0

Show file

File: test_bed.py Project: nebo56ucl/iCount

def merge_bed_wrapper(data):
    """
    TODO
    """
    files = []
    for file_ in data:
        files.append(make_file_from_list(file_))
    out_file = tempfile.NamedTemporaryFile(delete=False).name
    return make_list_from_file(merge_bed(out_file, files),
                               fields_separator='\t')

Example #25

0

Show file

def merge_bed_wrapper(data):
    """
    TODO
    """
    files = []
    for file_ in data:
        files.append(make_file_from_list(file_))
    out_file = get_temp_file_name()
    merge_bed(out_file, files)
    return make_list_from_file(out_file, fields_separator='\t')

Example #26

0

Show file

File: test_files.py Project: bakerwm/iCount

 def test_bed2bedgraph(self):
     iCount.files.bedgraph.bed2bedgraph(self.bed, self.bedgraph)
     expected = [
         ['track type=bedGraph name="User Track" description="User Supplied Track"'],
         ['1', '4', '5', '+5'],
         ['1', '5', '6', '+1'],
         ['1', '5', '6', '-1'],
         ['2', '5', '6', '+3'],
     ]
     result = make_list_from_file(self.bedgraph, fields_separator='\t')
     self.assertEqual(result, expected)

Example #27

0

Show file

File: test_region.py Project: yx-xu/iCount

    def test_basic(self):
        # seg is compositon of BED6 and GTF interval:
        nonmerged = make_file_from_list([
            [
                '1', '.', 'UTR3', '1', '10', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '11', '20', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '21', '30', '.', '+', '.',
                'biotype "lncRNA";gene_id "id2";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '-', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
        ])

        expected = [
            [
                '1', '.', 'UTR3', '1', '20', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '21', '30', '.', '+', '.',
                'biotype "lncRNA";gene_id "id2";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '+', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
            [
                '1', '.', 'UTR3', '31', '40', '.', '-', '.',
                'biotype "lncRNA";gene_id "id1";'
            ],
        ]

        region.merge_regions(nonmerged, self.tmp)
        results = make_list_from_file(self.tmp, fields_separator='\t')
        # Since order of attrs can be arbitrary, equality checks are more complex:
        for res, exp in zip(results, expected):
            self.assertEqual(res[:8], exp[:8])
            self.assertEqual(
                ';'.join(sorted(res[8].split(';'))),
                ';'.join(sorted(exp[8].split(';'))),
            )

Example #28

0

Show file

File: test_landmark.py Project: yx-xu/iCount

    def test_basic(self):
        regions = make_file_from_list([
            ['chr1', '.', 'CDS', '150', '200', '.', '+', '.', 'gene_name "A";'],
            ['chr1', '.', 'intron', '201', '400', '.', '+', '.', 'gene_name "A";'],
            ['chr1', '.', 'CDS', '401', '600', '.', '+', '.', 'gene_name "A";'],
        ])

        landmarks = get_temp_file_name(extension='bed')
        landmark.make_landmarks(regions, landmarks)
        self.assertEqual(make_list_from_file(landmarks), [
            ['chr1', '200', '201', 'exon-intron;A', '.', '+'],
            ['chr1', '400', '401', 'intron-exon;A', '.', '+'],
        ])

Example #29

0

Show file

File: test_files.py Project: bakerwm/iCount

 def test_fastq_file_write(self):
     data = [
         ['@header1', 'AAA', '+', 'FFF'],
         ['@header2', 'AAAA', '+', 'FFFF'],
     ]
     fq_file_name = get_temp_file_name(extension='fq.gz')
     fq_file = iCount.files.fastq.FastqFile(fq_file_name, 'wt')
     for line in data:
         fq_file.write(iCount.files.fastq.FastqEntry(*line))
     fq_file.close()
     result = make_list_from_file(fq_file_name)
     expected = [['@header1'], ['AAA'], ['+'], ['FFF'], ['@header2'], ['AAAA'], ['+'], ['FFFF']]
     self.assertEqual(result, expected)

Example #30

0

Show file

File: test_files.py Project: bakerwm/iCount

    def test_bed2bedgraph_params(self):
        """
        Test with custom ``name`` and ``description`` parameters.

        Note that ``name`` is too long and is trimmed to 15 characters.
        """
        iCount.files.bedgraph.bed2bedgraph(
            self.bed, self.bedgraph, name='Longer than 15 chars.', description='Custom text.')
        expected = [
            ['track type=bedGraph name="Longer than 15 " description="Custom text."'],
            ['1', '4', '5', '+5'],
            ['1', '5', '6', '+1'],
            ['1', '5', '6', '-1'],
            ['2', '5', '6', '+3'],
        ]
        result = make_list_from_file(self.bedgraph, fields_separator='\t')
        self.assertEqual(result, expected)