Ejemplo n.º 1
0
    def test_run_simple(self):
        bam_fname = make_bam_file(self.data)
        unique_fname = get_temp_file_name(extension='.bed.gz')
        multi_fname = get_temp_file_name(extension='.bed.gz')
        strange_fname = get_temp_file_name(extension='.bam.gz')

        result = xlsites.run(bam_fname,
                             unique_fname,
                             multi_fname,
                             strange_fname,
                             mapq_th=5,
                             report_progress=True)

        # pylint: disable=no-member
        self.assertEqual(result.all_recs, 6)
        # Unmapped records:
        self.assertEqual(result.notmapped_recs, 1)
        # Mapped records:
        self.assertEqual(result.mapped_recs, 5)
        # Records with too low quality:
        self.assertEqual(result.lowmapq_recs, 1)
        # Records used in analysis
        self.assertEqual(result.used_recs, 4)
        # Records with invalid randomers
        self.assertEqual(result.invalidrandomer_recs, 1)
        # Records with no randomers:
        self.assertEqual(result.norandomer_recs, 1)
        # Barcode counter:
        self.assertEqual(result.bc_cn, {'': 2, 'ACG': 1, 'CCCC': 1})
        # Strange counter:
        self.assertEqual(result.strange_recs, 1)
Ejemplo n.º 2
0
    def test_bed2bedgraph(self):
        command_basic = [
            'iCount',
            'bedgraph',
            self.cross_links,
            get_temp_file_name(extension='bedgraph'),
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        command_full = [
            'iCount',
            'bedgraph',
            self.cross_links,
            get_temp_file_name(extension='bedgraph'),
            '--name',
            'Name.',
            '--description',
            'Description.',
            '--visibility',
            'full',
            '--priority',
            '20',
            '--color',
            '256,0,0',
            '--alt_color',
            '0,256,0',
            '--max_height_pixels',
            '100:50:0',
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        self.assertEqual(subprocess.call(command_basic), 0)
        self.assertEqual(subprocess.call(command_full), 0)
Ejemplo n.º 3
0
    def test_e2e(self):
        """
        From raw reads and ENSEMBL annotation to rnamaps.
        """

        # Make segmentation & regions file
        seg = get_temp_file_name(extension='gtf')
        out_dir = get_temp_dir()
        iCount.genomes.segment.get_segments(self.gtf, seg, self.fai)
        iCount.genomes.region.make_regions(seg, out_dir)
        regions = os.path.join(out_dir, iCount.genomes.region.REGIONS_FILE)

        # Build STAR index:
        genome_index = get_temp_dir()
        rcode = iCount.externals.star.build_index(self.fasta, genome_index, annotation=self.gtf)
        self.assertEqual(rcode, 0)
        # Map reads:
        map_dir = get_temp_dir()
        rcode = iCount.externals.star.map_reads(
            self.reads, genome_index, out_dir=map_dir, annotation=self.gtf)
        self.assertEqual(rcode, 0)

        # Get bam with mapped reads:
        bam = [fname for fname in os.listdir(map_dir) if fname.startswith('Aligned')][0]
        bam = os.path.join(map_dir, bam)

        sites_single = get_temp_file_name(extension='bed.gz')
        sites_multi = get_temp_file_name(extension='bed.gz')
        skipped = get_temp_file_name(extension='bam')
        iCount.mapping.xlsites.run(bam, sites_single, sites_multi, skipped)

        iCount.analysis.rnamaps.run(sites_single, regions)
Ejemplo n.º 4
0
 def setUp(self):
     self.adapter = 'AAAATTTTCCCCGGGG'
     self.reads = make_fastq_file(
         adapter=self.adapter,
         num_sequences=100,
         out_file=get_temp_file_name(extension='fastq'))
     self.tmp = get_temp_file_name(extension='fastq')
     warnings.simplefilter("ignore", ResourceWarning)
Ejemplo n.º 5
0
    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)

        # Temporary file names to use for output:
        self.tmp1 = get_temp_file_name()
        self.tmp2 = get_temp_file_name()
        self.dir = get_temp_dir()
        self.dir2 = get_temp_dir()

        self.cross_links = make_file_from_list([
            ['1', '16', '17', '.', '5', '+'],
            ['1', '14', '15', '.', '5', '+'],
            ['1', '15', '16', '.', '5', '+'],
        ],
                                               extension='bed')

        self.peaks = make_file_from_list([
            ['1', '15', '16', '.', '15', '+'],
        ])

        self.annotation = make_file_from_list([
            ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'ncRNA', '10', '20', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "B";'],
            ['1', '.', 'CDS', '10', '20', '.', '-', '.', 'biotype "C";'],
            ['1', '.', 'CDS', '12', '18', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'CDS', '30', '40', '.', '+', '.', 'biotype "D";'],
        ])

        self.gtf = make_file_from_list([
            ['1', '.', 'gene', '10', '20', '.', '+', '.', 'gene_id "A";'],
            [
                '1', '.', 'transcript', '10', '20', '.', '+', '.',
                'gene_id "A"; transcript_id "AA";'
            ],
            [
                '1', '.', 'exon', '10', '20', '.', '+', '.',
                'gene_id "A"; transcript_id "AA"; exon_number "1";'
            ],
        ])

        self.bam = make_bam_file(
            {
                'chromosomes': [
                    ('1', 3000),
                    ('2', 2000),
                ],
                'segments': [
                    ('name3:rbc:CCCC:', 0, 0, 100, 20, [(0, 100)], {
                        'NH': 1
                    }),
                    ('name4:ABC', 0, 0, 300, 20, [(0, 200)], {
                        'NH': 11
                    }),
                ]
            },
            rnd_seed=0)
Ejemplo n.º 6
0
 def test_plot(self):
     image_file = get_temp_file_name(extension='png')
     norm_file = get_temp_file_name(extension='txt')
     rnamaps.make_normalization(self.gtf, norm_file)
     rnamaps.plot_rna_map(norm_file,
                          'CDS-intron',
                          normalization=norm_file,
                          outfile=image_file)
     self.assertTrue(os.path.isfile(image_file))
Ejemplo n.º 7
0
    def setUp(self):
        warnings.simplefilter("ignore", (ResourceWarning, ImportWarning))
        self.gtf_data = list_to_intervals([
            ['1', '.', 'intergenic', '1', '99', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            # Gene #1:
            ['1', '.', 'gene', '100', '499', '.', '+', '.',
             'gene_id "G1";'],
            # Transcript #1
            ['1', '.', 'transcript', '100', '249', '.', '+', '.',
             'gene_id "G1"; transcript_id "T1";'],
            ['1', '.', 'UTR5', '100', '149', '.', '+', '.',
             'gene_id "G1"; transcript_id "T1"; exon_number "1";'],
            ['1', '.', 'intron', '150', '199', '.', '+', '.',
             'gene_id "G1"; transcript_id "T1";'],
            ['1', '.', 'CDS', '200', '229', '.', '+', '.',
             'gene_id "G1"; transcript_id "T1"; exon_number "2";'],
            ['1', '.', 'intron', '230', '239', '.', '+', '.',
             'gene_id "G1"; transcript_id "T1";'],
            ['1', '.', 'UTR3', '240', '249', '.', '+', '.',
             'gene_id "G1"; transcript_id "T1"; exon_number "3";'],

            # Transcript #2
            ['1', '.', 'transcript', '240', '499', '.', '+', '.',
             'gene_id "G1"; transcript_id "T2";'],
            ['1', '.', 'CDS', '240', '299', '.', '+', '.',
             'gene_id "G1"; transcript_id "T2"; exon_number "1";'],
            ['1', '.', 'intron', '300', '399', '.', '+', '.',
             'gene_id "G1"; transcript_id "T2";'],
            ['1', '.', 'CDS', '400', '499', '.', '+', '.',
             'gene_id "G1"; transcript_id "T2"; exon_number "2";'],

            # intergenic
            ['1', '.', 'intergenic', '500', '599', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],

            # Gene #1:
            ['1', '.', 'gene', '600', '999', '.', '+', '.',
             'gene_id "G2";'],

            # Transcript #3
            ['1', '.', 'transcript', '600', '799', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'CDS', '600', '649', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "1";'],
            ['1', '.', 'intron', '650', '749', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'CDS', '750', '799', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "2";'],

        ])
        self.gtf = make_file_from_list(intervals_to_list(self.gtf_data))
        self.strange = get_temp_file_name()
        self.cross_tr = get_temp_file_name()
        self.out = get_temp_file_name()
Ejemplo n.º 8
0
    def test_clusters(self):
        fin_sites = make_file_from_list([
            ['1', '1', '2', '.', '1', '+'],
            ['1', '2', '3', '.', '1', '+'],
            ['1', '3', '4', '.', '1', '+'],
            ['1', '4', '5', '.', '2', '+'],
            ['1', '4', '5', '.', '1', '-'],
            ['1', '5', '6', '.', '1', '+'],
            ['1', '6', '7', '.', '1', '-'],
            ['1', '7', '8', '.', '1', '-'],
            ['1', '10', '11', '.', '1', '+'],
            ['1', '11', '12', '.', '2', '+'],
            ['1', '12', '13', '.', '1', '+'],
        ])

        fin_peaks = make_file_from_list([
            ['1', '4', '5', 'cl1', '1', '+'],
            ['1', '4', '5', 'cl2', '1', '-'],
            ['1', '5', '6', 'cl3', '1', '+'],
            ['1', '11', '12', 'cl4', '2', '+'],
        ])

        fout_clusters = get_temp_file_name()

        clusters.run(fin_sites, fin_peaks, fout_clusters, dist=3, slop=2)
        result = make_list_from_file(fout_clusters, fields_separator='\t')

        expected = [
            ['1', '2', '6', 'cl1,cl3', '5', '+'],
            ['1', '4', '7', 'cl2', '2', '-'],
            ['1', '10', '13', 'cl4', '4', '+'],
        ]

        self.assertEqual(expected, result)
Ejemplo n.º 9
0
    def test_barcode_size_diff(self):
        # One mismatch, second barcode.
        barcodes = ['NAAANN', 'NNCCCNN']
        adapter = 'CCCCCC'
        data = [
            '@header1',
            'TACATT' + adapter + make_sequence(40, rnd_seed=0),
            '+',
            make_quality_scores(50, rnd_seed=0) + '!J',
            '@header2',
            'AACCCTT' + adapter + make_sequence(39, rnd_seed=0),
            '+',
            make_quality_scores(50, rnd_seed=0) + '!J',
        ]
        fq_fname = get_temp_file_name(extension='fq')
        fq_file = iCount.files.fastq.FastqFile(fq_fname, 'wt')
        fq_file.write(iCount.files.fastq.FastqEntry(*data[:4]))
        fq_file.write(iCount.files.fastq.FastqEntry(*data[4:]))
        fq_file.close()

        handle = demultiplex._extract(fq_fname, barcodes, mismatches=1)
        read1, exp_id1, randomer1 = next(handle)
        self.assertEqual(exp_id1, 0)
        self.assertEqual(randomer1, 'TTT')
        self.assertEqual(read1.id, data[0])
        self.assertEqual(read1.seq, data[1][6:])
        self.assertEqual(read1.plus, '+')
        self.assertEqual(read1.qual, data[3][6:])
        read2, exp_id2, randomer2 = next(handle)
        self.assertEqual(exp_id2, 1)
        self.assertEqual(randomer2, 'AATT')
        self.assertEqual(read2.id, data[4])
        self.assertEqual(read2.seq, data[5][7:])
        self.assertEqual(read2.plus, '+')
        self.assertEqual(read2.qual, data[7][7:])
Ejemplo n.º 10
0
def template(cross_links, annotation, subtype='biotype',
             excluded_types=None):
    """
    Utility function for testing iCount.analysis.annotate

    Instead of input files, accept the file content in form of lists and create
    temporary files from them on the fly. This avoids the problem of having a
    bunch of multiple small files or one large file (which would violate the
    idea of test isolation).

    For example of how to use this function check any test that uses it.

    Parameters
    ----------
    cross_links : list
        List representation of cross-links file.
    annotation : list
        List representation of annotation file.

    Returns
    -------
    list
        List representation of output file of analysis.annotate().

    """
    cross_links_file = make_file_from_list(cross_links, extension='bed.gz')
    annotation_file = make_file_from_list(annotation, extension='gtf.gz')
    out_file = get_temp_file_name(extension='bed.gz')
    annotate.annotate_cross_links(annotation_file, cross_links_file, out_file, subtype=subtype,
                                  excluded_types=excluded_types)
    return make_list_from_file(out_file, fields_separator='\t')
Ejemplo n.º 11
0
 def test_plot(self):
     outfile = get_temp_file_name(extension='png')
     plot_combined.plot_combined(self.results_file,
                                 outfile,
                                 top_n=2,
                                 nbins=50)
     self.assertTrue(os.path.isfile(outfile))
Ejemplo n.º 12
0
    def test_normalisation(self):
        norm_file = get_temp_file_name(extension='txt')
        rnamaps.make_normalization(self.gtf, norm_file)

        expected = [
            ['RNAmap_type', 'distance', 'segments'],
            ['CDS-UTR3', '-1', '1'],
            ['CDS-UTR3', '0', '1'],
            ['CDS-UTR3', '1', '1'],
            ['CDS-intron', '-1', '1'],
            ['CDS-intron', '0', '1'],
            ['CDS-intron', '1', '1'],
            ['CDS-intron', '2', '1'],
            ['integrenic-CDS', '-2', '1'],
            ['integrenic-CDS', '-1', '1'],
            ['integrenic-CDS', '0', '1'],
            ['intron-UTR3', '-3', '1'],
            ['intron-UTR3', '-2', '1'],
            ['intron-UTR3', '-1', '1'],
            ['intron-UTR3', '0', '1'],
            ['intron-UTR3', '1', '1'],
            ['intron-ncRNA', '-1', '1'],
            ['intron-ncRNA', '0', '1'],
            ['ncRNA-integrenic', '-1', '1'],
            ['ncRNA-integrenic', '0', '1'],
            ['ncRNA-integrenic', '1', '1'],
            ['ncRNA-intron', '-2', '1'],
            ['ncRNA-intron', '-1', '1'],
            ['ncRNA-intron', '0', '1'],
            ['ncRNA-ncRNA', '-2', '1'],
            ['ncRNA-ncRNA', '-1', '1'],
            ['ncRNA-ncRNA', '0', '1'],
        ]

        self.assertEqual(expected, make_list_from_file(norm_file))
Ejemplo n.º 13
0
    def test_cutadapt(self):
        adapter = 'CCCCCCCCC'
        fastq = make_fastq_file(adapter=adapter,
                                out_file=get_temp_file_name(extension='fastq'),
                                rnd_seed=0)

        command_basic = [
            'iCount',
            'cutadapt',
            fastq,
            self.tmp1,
            adapter,
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]
        command_full = [
            'iCount',
            'cutadapt',
            fastq,
            self.tmp1,
            adapter,
            '--qual_trim',
            '20',
            '--minimum_length',
            '15',
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        self.assertEqual(subprocess.call(command_basic), 0)
        self.assertEqual(subprocess.call(command_full), 0)
Ejemplo n.º 14
0
 def test_plot(self):
     outfile = get_temp_file_name(extension='png')
     plot_rnaheatmap.plot_rnaheatmap(self.results_file,
                                     outfile,
                                     top_n=2,
                                     binsize=10)
     self.assertTrue(os.path.isfile(outfile))
Ejemplo n.º 15
0
    def test_run(self):
        fin_annotation = make_file_from_list([
            [
                '1', '.', 'gene', '10', '20', '.', '+', '.',
                'gene_name "A"; gene_id "1";'
            ],
            [
                '1', '.', 'transcript', '10', '20', '.', '+', '.',
                'gene_name "B"; gene_id "1";'
            ],
            [
                '2', '.', 'CDS', '10', '20', '.', '+', '.',
                'gene_name "C"; gene_id "1";'
            ],
        ])

        fin_sites = make_file_from_list([
            ['1', '14', '15', '.', '3', '+'],
            ['1', '16', '17', '.', '5', '+'],
            ['2', '16', '17', '.', '5', '+'],
        ])

        fout_peaks = get_temp_file_name(extension='.bed.gz')
        fout_scores = get_temp_file_name(extension='.tsv.gz')

        peaks.run(fin_annotation, fin_sites, fout_peaks, scores=fout_scores)

        out_peaks = make_list_from_file(fout_peaks, fields_separator='\t')
        out_scores = make_list_from_file(fout_scores, fields_separator='\t')
        # Remove header:
        out_scores = out_scores[1:]

        expected_peaks = [
            ['1', '14', '15', 'A-1', '3', '+'],
            ['1', '16', '17', 'A-1', '5', '+'],
        ]
        expected_scores = [
            ['1', '14', '+', 'A', '1', '3', '8', '0.036198'],
            ['1', '16', '+', 'A', '1', '5', '8', '0.036198'],
            [
                '2', '16', '+', 'not_annotated', 'not_annotated', '5',
                'not_calculated', '1'
            ],
        ]

        self.assertEqual(out_peaks, expected_peaks)
        self.assertEqual(out_scores, expected_scores)
Ejemplo n.º 16
0
    def create_fq_file(self, entries):
        fname = get_temp_file_name(extension='fq')

        fq_file = FastqFile(fname, 'wt')
        for entry in entries:
            fq_file.write(entry)
        fq_file.close()
        return fname
Ejemplo n.º 17
0
    def test_xlsites(self):
        # Make a sample bam file
        unique = get_temp_file_name(extension='.bed')
        multi = get_temp_file_name(extension='.bed')
        strange = get_temp_file_name(extension='.bam')

        command_basic = [
            'iCount',
            'xlsites',
            self.bam,
            unique,
            multi,
            strange,
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]
        command_full = [
            'iCount',
            'xlsites',
            self.bam,
            unique,
            multi,
            strange,
            '--group_by',
            'start',
            '--quant',
            'cDNA',
            '--mismatches',
            '2',
            '--mapq_th',
            '0',
            '--multimax',
            '50',
            '--gap_th',
            '4',
            '--ratio_th',
            '0.1',
            '--max_barcodes',
            '10000',
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        self.assertEqual(subprocess.call(command_basic), 0)
        self.assertEqual(subprocess.call(command_full), 0)
Ejemplo n.º 18
0
    def test_error_open_bamfile(self):
        """
        Provide onyl file with no content - error shoud be raised.
        """
        bam_fname = get_temp_file_name()

        message = r"Error opening BAM file: .*"
        with self.assertRaisesRegex(ValueError, message):
            xlsites._processs_bam_file(bam_fname, self.metrics, 50, self.tmp)
Ejemplo n.º 19
0
    def test_all_good(self):
        gtf_in_data = list_to_intervals([
            ['1', '.', 'gene', '400', '500', '.', '+', '.',
             'gene_id "G2";'],
            ['1', '.', 'transcript', '400', '500', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'exon', '400', '430', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "1"'],
            ['1', '.', 'CDS', '410', '430', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'exon', '470', '500', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "2"'],
            ['1', '.', 'CDS', '470', '490', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
        ])
        gtf_in_file = make_file_from_list(intervals_to_list(gtf_in_data))

        gtf_out = get_temp_file_name()

        genome_file = make_file_from_list(
            [
                ['1', '2000'],
                ['MT', '500'],
            ], bedtool=False)

        segment.get_regions(gtf_in_file, gtf_out, genome_file)
        gtf_out_data = list_to_intervals(make_list_from_file(gtf_out, fields_separator='\t'))

        expected = list_to_intervals([
            ['1', '.', 'intergenic', '1', '399', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            ['1', '.', 'intergenic', '1', '2000', '.', '-', '.',
             'gene_id "."; transcript_id ".";'],
            ['1', '.', 'transcript', '400', '500', '.', '+', '.',
             'gene_id "G2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'UTR5', '400', '409', '.', '+', '.',
             'gene_id "G2";exon_number "1";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'gene', '400', '500', '.', '+', '.',
             'gene_id "G2"; biotype "[.]";'],
            ['1', '.', 'CDS', '410', '430', '.', '+', '.',
             'gene_id "G2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'intron', '431', '469', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; biotype ".";'],
            ['1', '.', 'CDS', '470', '490', '.', '+', '.',
             'gene_id "G2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'UTR3', '491', '500', '.', '+', '.',
             'gene_id "G2";exon_number "2";transcript_id "T3"; biotype ".";'],
            ['1', '.', 'intergenic', '501', '2000', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            ['MT', '.', 'intergenic', '1', '500', '.', '+', '.',
             'gene_id "."; transcript_id ".";'],
            ['MT', '.', 'intergenic', '1', '500', '.', '-', '.',
             'gene_id "."; transcript_id ".";'],
        ])

        self.assertEqual(expected, gtf_out_data)
Ejemplo n.º 20
0
def merge_bed_wrapper(data):
    """
    TODO
    """
    files = []
    for file_ in data:
        files.append(make_file_from_list(file_))
    out_file = get_temp_file_name()
    merge_bed(out_file, files)
    return make_list_from_file(out_file, fields_separator='\t')
Ejemplo n.º 21
0
    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)

        bed_data = [
            ['1', '4', '5', '.', '5', '+'],
            ['1', '5', '6', '.', '1', '+'],
            ['1', '5', '6', '.', '1', '-'],
            ['2', '5', '6', '.', '3', '+'],
        ]
        self.bed = make_file_from_list(bed_data, extension='bed')
        self.bedgraph = get_temp_file_name(extension='bedgraph')
Ejemplo n.º 22
0
    def test_basic(self):
        regions = make_file_from_list([
            ['chr1', '.', 'CDS', '150', '200', '.', '+', '.', 'gene_name "A";'],
            ['chr1', '.', 'intron', '201', '400', '.', '+', '.', 'gene_name "A";'],
            ['chr1', '.', 'CDS', '401', '600', '.', '+', '.', 'gene_name "A";'],
        ])

        landmarks = get_temp_file_name(extension='bed')
        landmark.make_landmarks(regions, landmarks)
        self.assertEqual(make_list_from_file(landmarks), [
            ['chr1', '200', '201', 'exon-intron;A', '.', '+'],
            ['chr1', '400', '401', 'intron-exon;A', '.', '+'],
        ])
Ejemplo n.º 23
0
 def test_fastq_file_write(self):
     data = [
         ['@header1', 'AAA', '+', 'FFF'],
         ['@header2', 'AAAA', '+', 'FFFF'],
     ]
     fq_file_name = get_temp_file_name(extension='fq.gz')
     fq_file = iCount.files.fastq.FastqFile(fq_file_name, 'wt')
     for line in data:
         fq_file.write(iCount.files.fastq.FastqEntry(*line))
     fq_file.close()
     result = make_list_from_file(fq_file_name)
     expected = [['@header1'], ['AAA'], ['+'], ['FFF'], ['@header2'], ['AAAA'], ['+'], ['FFFF']]
     self.assertEqual(result, expected)
Ejemplo n.º 24
0
    def test_rnamaps(self):
        command_basic = [
            'iCount',
            'rnamaps',
            self.bam,
            self.gtf,
            self.tmp1,
            get_temp_file_name(extension='.bam'),
            self.tmp2,
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        self.assertEqual(subprocess.call(command_basic), 0)
Ejemplo n.º 25
0
    def test_peaks(self):
        command_basic = [
            'iCount',
            'peaks',
            self.annotation,
            self.cross_links,
            get_temp_file_name(extension='.bed.gz'),
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        command_full = [
            'iCount',
            'peaks',
            self.annotation,
            self.cross_links,
            get_temp_file_name(extension='.bed.gz'),
            '--scores',
            get_temp_file_name(extension='.tsv.gz'),
            '--half_window',
            '3',
            '--fdr',
            '0.05',
            '--perms',
            '10',
            '--rnd_seed',
            '42',
            '--features',
            'gene',
            '--report_progress',
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        self.assertEqual(subprocess.call(command_basic), 0)
        self.assertEqual(subprocess.call(command_full), 0)
Ejemplo n.º 26
0
    def test_annotation(self):
        # Execute only full command (with --target_dir), to avoid downloading to cwd.
        command_full = [
            'iCount',
            'annotation',
            'human',
            '27',
            '--out_dir',
            self.dir,
            '--annotation',
            get_temp_file_name(extension='gtf.gz'),
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        self.assertEqual(subprocess.call(command_full), 0)
Ejemplo n.º 27
0
    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)

        self.dir = get_temp_dir()
        self.adapter = 'AAAAAAAAAA'
        self.barcodes5 = [
            'NNAAAN',
            'NGGGN',
            'NGGGN',
        ]
        self.barcodes3 = [
            '.',
            'NNGGG',
            'NCCC',
        ]
        # Header: early version Illumina header
        # Barcodes: exact match to the barcode set #1
        self.entry1 = FastqEntry(
            '@header1/1',
            'GGAAAG' + make_sequence(40) + self.adapter,
            '+',
            make_quality_scores(56),
        )
        # Header: contains id and description
        # Barcodes: one mismatch on 5' end for barcode set #2
        self.entry2 = FastqEntry(
            '@header2 blah',
            'AGGTA' + make_sequence(40) + 'AAGGG' + self.adapter,
            '+',
            make_quality_scores(60),
        )
        # Header: simple header
        # Barcodes: one mismatch on 3' end for barcode set #3
        self.entry3 = FastqEntry(
            '@header3',
            'TGGGT' + make_sequence(40) + 'TACC' + self.adapter,
            '+',
            make_quality_scores(59),
        )

        self.fq_fname = get_temp_file_name(extension='fq')
        self.fq_file = iCount.files.fastq.FastqFile(self.fq_fname, 'wt')
        for entry in [self.entry1, self.entry2, self.entry3]:
            self.fq_file.write(entry)
        self.fq_file.close()
Ejemplo n.º 28
0
def _make_types_length(annotation, subtype='biotype', excluded_types=None):
    """
    Run function `make_types_length_file` with data from `annotation`.
    """
    annotation_file = make_file_from_list(annotation)
    out_file = get_temp_file_name()
    fai = make_file_from_list(bedtool=False,
                              data=[
                                  ['1', '100'],
                                  ['6', '100'],
                                  ['20', '100'],
                              ])
    result, _ = summary.make_types_length_file(annotation_file,
                                               fai,
                                               out_file,
                                               subtype=subtype,
                                               excluded_types=excluded_types)
    return make_list_from_file(result, fields_separator='\t')
Ejemplo n.º 29
0
    def test_genome(self):
        # Download just MT and Y chromosome, or test can last too long...
        command_full = [
            'iCount',
            'genome',
            'homo_sapiens',
            '--release',
            '84',
            '--out_dir',
            self.dir,
            '--genome',
            get_temp_file_name(extension='fa.gz'),
            '--chromosomes',
            'MT',
            'Y',
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        self.assertEqual(subprocess.call(command_full), 0)
Ejemplo n.º 30
0
    def test_genome(self):
        # Download just MT and Y chromosome, or test can last too long...
        # Only test ENSEMBL, since GENCODE does not allow download of single chromosome.
        command_full = [
            'iCount',
            'genome',
            'homo_sapiens',
            '84',
            '--source',
            'ensembl',
            '--out_dir',
            self.dir,
            '--genome',
            get_temp_file_name(extension='fa.gz'),
            '--chromosomes',
            'MT',
            'Y',
            '-S',
            '40',  # Supress lower than ERROR messages.
        ]

        self.assertEqual(subprocess.call(command_full), 0)