Beispiel #1
0
    def _annotate_gtf(self, read_length):
        annotate_gtf = Job(name='annotate_gtf')
        annotate_gtf.invoke('all', self._state_update % 'Generating annotation FASTA files')

        prefix = self._get_index_hash(read_length)

        # Inputs
        gtf = File('%s.gtf' % self._species.name)

        chromosomes = self._species.chromosomes

        for i in chromosomes:
            chr_i = File('%s/chr%s.fa' % (self._species.name, i))

            # Uses
            annotate_gtf.uses(chr_i, link=Link.INPUT)

        # Outputs
        features = File('h%s/FEATURES.fa' % prefix)
        chrs = File('h%s/GENOME.fa' % prefix)
        splices = File('h%s/SPLICES.fa' % prefix)
        genes = File('h%s/GENE.fa' % prefix)

        # Arguments
        annotate_gtf.addArguments(gtf, '-c', self._species.name, '-p h%s/' % prefix, '-l %d' % read_length)

        # Uses
        annotate_gtf.uses(gtf, link=Link.INPUT)
        annotate_gtf.uses(features, link=Link.OUTPUT, transfer=False, register=False)
        annotate_gtf.uses(chrs, link=Link.OUTPUT, transfer=False, register=False)
        annotate_gtf.uses(splices, link=Link.OUTPUT, transfer=False, register=False)
        annotate_gtf.uses(genes, link=Link.OUTPUT, transfer=False, register=False)

        self.adag.addJob(annotate_gtf)
Beispiel #2
0
    def _analyze(self):
        analyze = Job(name='analyze_samfile')
        analyze.invoke('all', self._state_update % 'Analyzing SAM file')

        # Input files
        sam_file = File('%s.sam' % self._prefix)

        # Output files
        genes_counts = File('%s.gene.cnts' % self._prefix)
        features_counts = File('%s.feature.cnts' % self._prefix)
        ambiguous_genes_counts = File('%s.ambiguousGenes.cnts' % self._prefix)
        overlap_genes_counts = File('%s.overlapGene.cnts' % self._prefix)
        summary_out = File('%s.summary.out' % self._prefix)

        # Arguments
        analyze.addArguments(sam_file, '--prefix', self._prefix)

        # Uses
        analyze.uses(sam_file, link=Link.INPUT)
        analyze.uses(genes_counts, link=Link.OUTPUT, transfer=True, register=False)
        analyze.uses(features_counts, link=Link.OUTPUT, transfer=True, register=False)
        analyze.uses(ambiguous_genes_counts, link=Link.OUTPUT, transfer=True, register=False)
        analyze.uses(overlap_genes_counts, link=Link.OUTPUT, transfer=True, register=False)
        analyze.uses(summary_out, link=Link.OUTPUT, transfer=True, register=False)

        self.adag.addJob(analyze)
Beispiel #3
0
    def _pre_filter_fastq(self, index, suffix_len):
        pre_filter = Job(name='pre_filter_fastq.py')
        pre_filter.invoke('all', self._state_update % 'Pre-filter reads file part %d' % (index + 1))
        prefix = 'reads%d' % index

        # Inputs
        reads = File(('x%0' + str(suffix_len) + 'd') % index)

        # Outputs
        full_fastq = File('%s_full.fastq' % prefix)
        reject = File('%s_reject.fastq' % prefix)
        stats = File('%s.stats' % prefix)

        # Arguments
        trims = ','.join([str(i) for i in self._trims])
        trims = '0' if trims == ',' else trims

        pre_filter.addArguments(reads, '-r', '%d' % self._read_length, '-t', '%s' % trims)
        pre_filter.addArguments('-p', prefix)

        # Uses
        pre_filter.uses(reads, link=Link.INPUT)

        for t in self._trims:
            fastq_t = File('%s_%d.fastq' % (prefix, t))
            pre_filter.uses(fastq_t, link=Link.OUTPUT, transfer=False, register=False)

        pre_filter.uses(full_fastq, link=Link.OUTPUT, transfer=False, register=False)
        pre_filter.uses(reject, link=Link.OUTPUT, transfer=False, register=False)
        pre_filter.uses(stats, link=Link.OUTPUT, transfer=False, register=False)

        self.adag.addJob(pre_filter)
Beispiel #4
0
    def _perm_index(self, index_type, read_length, read_format='fastq', seed='F2'):
        perm_index = Job(name='perm')
        perm_index.invoke('all', self._state_update % 'Pre-computing %s index file' % index_type.capitalize())

        prefix = self._get_index_hash(read_length)

        # Input files
        fa_input = File('h%s/%s.fa' % (prefix, index_type))

        # Output files
        hash_v = self._get_index_hash(read_length, seed)
        index = File('h%d_%s_%s_%s.index' % (hash_v, index_type, seed, read_length))

        # Arguments
        perm_index.addArguments(fa_input, '%d' % read_length, '--readFormat', read_format, '--seed', seed)
        perm_index.addArguments('-s', index)

        # Uses
        perm_index.uses(fa_input, link=Link.INPUT)
        # Save this file
        perm_index.uses(index, link=Link.OUTPUT, transfer=True, register=True)

        self.adag.addJob(perm_index)

        return perm_index
Beispiel #5
0
    def _bar_plot(self):
        bar_plot = Job(name='bar_plot')
        bar_plot.invoke('all', self._state_update % 'Plot summary out file')

        # Input files
        summary_file = File('%s.summary.out' % self._prefix)

        # Output files
        pdf_file = File('%s.ps' % self._prefix)

        # Arguments
        bar_plot.addArguments('--output-file', pdf_file, summary_file)

        # Uses
        bar_plot.uses(summary_file, link=Link.INPUT)
        bar_plot.uses(pdf_file, link=Link.OUTPUT, transfer=True, register=False)

        self.adag.addJob(bar_plot)
Beispiel #6
0
    def _farish_compact(self):
        farish_compact = Job(name='farish_compact')
        farish_compact.invoke('all', self._state_update % 'Farish Compact')

        # Input files
        unmapped = File('%s.unmapped.fastq' % self._prefix)

        # Output files
        compact = File('%s.compact' % self._prefix)

        # Arguments
        farish_compact.addArguments(unmapped, '-o', compact)

        # Uses
        farish_compact.uses(unmapped, link=Link.INPUT)
        farish_compact.uses(compact, link=Link.OUTPUT, transfer=True, register=False)

        self.adag.addJob(farish_compact)
Beispiel #7
0
    def _clipr(self, clip_to, reads, tag):
        anchor = self._compute_clip_seed(self._read_length)

        clip_reads = Job(name='clipR')
        clip_reads.invoke('all', self._state_update % 'Generate new splice candidates')

        seed = 'F%s' % self._clip_seed
        mismatches = self._clip_mismatches

        # Input files
        prefix = self._get_index_hash(self._read_length)
        fa = File('h%s/%s.fa' % (prefix, clip_to.upper()))
        reads_txt = File('%s_%s_reads.txt' % (tag, clip_to.lower()))

        for i in self._range():
            # Input files
            reads_i = File(reads % i)

            # Output files
            file_type = 'sam'
            path, file_name, ext = GTFAR._get_filename_parts(reads_i.name)
            sam_mapping = '%s_A_%d_%d_%d_%s.%s' % (clip_to.upper(), self._clip_seed, mismatches, anchor, file_name, file_type)
            fastq_out = File('%s_miss_%s%s' % (file_name, clip_to, ext))

            # Uses
            clip_reads.uses(reads_i, link=Link.INPUT)
            clip_reads.uses(fastq_out, link=Link.OUTPUT, transfer=False, register=False)
            clip_reads.uses(sam_mapping, link=Link.OUTPUT, transfer=False, register=False)

        # Output files
        log = File('%s_%s.log' % (tag, clip_to.lower()))

        # Arguments
        clip_reads.addArguments(fa, reads_txt, '--seed %s' % seed, '--anchorL %d' % anchor, '-e', '-v %d' % mismatches)
        clip_reads.addArguments('-s', '-u', '--noSamHeader', '--ignoreDummyR %d' % 40, '--ignoreRepeatR %d' % 15)

        clip_reads.setStdout(log)

        # Uses
        clip_reads.uses(fa, link=Link.INPUT)
        clip_reads.uses(reads_txt, link=Link.INPUT)
        clip_reads.uses(log, link=Link.OUTPUT, transfer=False, register=False)

        self.adag.addJob(clip_reads)
Beispiel #8
0
    def _perm(self, index_type, map_to, reads, tag, output_sam=False):
        perm = Job(name='perm')
        perm.invoke('all', self._state_update % 'Map reads to %s' % map_to.capitalize())

        # Input files
        hash_v = self._get_index_hash(self._read_length, 'F%d' % self._seed)
        index = File('h%d_%s_F%d_%d.index' % (hash_v, map_to, self._seed, self._read_length))
        reads_txt = File('%s_%s_reads.txt' % (tag, map_to.lower()))

        for i in self._range():
            # Input files
            reads_i = File(reads % i)

            # Output files
            file_type = 'sam' if output_sam else 'mapping'
            path, file_name, ext = GTFAR._get_filename_parts(reads_i.name)
            sam_mapping = '%s_B_%d_%d_%s.%s' % (map_to.upper(), self._seed, self._mismatches, file_name, file_type)
            fastq_out = File('%s_miss_%s%s' % (file_name, map_to, ext))

            # Uses
            perm.uses(reads_i, link=Link.INPUT)
            perm.uses(fastq_out, link=Link.OUTPUT, transfer=False, register=False)
            perm.uses(sam_mapping, link=Link.OUTPUT, transfer=False, register=False)

        # Output files
        log = File('%s_%s.log' % (tag, map_to.upper()))

        # Arguments
        perm.addArguments(index, reads_txt, '--seed F%d' % self._seed, '-v %d' % self._mismatches, '-B', '--printNM')
        perm.addArguments('-u', '-s', '-T %d' % self._read_length)

        if output_sam:
            perm.addArguments('--noSamHeader', '--outputFormat', 'sam')

        perm.setStdout(log)

        # Uses
        perm.uses(index, link=Link.INPUT)
        perm.uses(reads_txt, link=Link.INPUT)
        perm.uses(log, link=Link.OUTPUT, transfer=False, register=False)

        self.adag.addJob(perm)
Beispiel #9
0
    def _fastq_split(self, splits=2, suffix_len=2):
        fastq_split = Job(name='fastq-split')
        fastq_split.invoke('all', self._state_update % 'Splitting input reads file into %d parts' % splits)

        # Inputs
        reads = File(self._reads)

        # Arguments
        fastq_split.addArguments(reads, '%d' % splits)

        # Uses
        fastq_split.uses(reads, link=Link.INPUT)

        for i in range(splits):
            split_i = File(('x%0' + str(suffix_len) + 'd') % i)

            # Outputs
            fastq_split.uses(split_i, link=Link.OUTPUT, transfer=False, register=False)

        self.adag.addJob(fastq_split)
Beispiel #10
0
    def _transcript_prediction(self):
        transcript_prediction = Job(name='transcript_prediction')
        transcript_prediction.invoke('all', self._state_update % 'Transcript Prediction')

        # Input files
        features_counts = File('%s.feature.cnts' % self._prefix)
        gtf = File('%s.splice_candidates.gtf' % self._prefix)

        # Output files
        transcript_counts = File('%s.transcripts.cnts' % self._prefix)

        # Arguments
        transcript_prediction.addArguments(features_counts, '-g', gtf)

        # Uses
        transcript_prediction.setStdout(transcript_counts)
        transcript_prediction.uses(features_counts, link=Link.INPUT)
        transcript_prediction.uses(gtf, link=Link.INPUT)
        transcript_prediction.uses(transcript_counts, link=Link.OUTPUT, transfer=True, register=False)

        self.adag.addJob(transcript_prediction)
Beispiel #11
0
    def _parse_clipped_alignment(self, input_file):
        parse_clipped_alignment = Job(name='parse_clipped_alignment')
        parse_clipped_alignment.invoke('all', self._state_update % 'Parse clipped alignment')

        # Input files
        input_file = File(input_file)

        # Output files
        info = File('%s.info' % input_file.name)

        self._info_files.append(info.name)

        # Arguments
        parse_clipped_alignment.addArguments(input_file)
        parse_clipped_alignment.setStdout(info)

        # Uses
        parse_clipped_alignment.uses(input_file, link=Link.INPUT)
        parse_clipped_alignment.uses(info, link=Link.OUTPUT, transfer=False, register=False)

        self.adag.addJob(parse_clipped_alignment)
Beispiel #12
0
    def _merge_stats(self):
        merge_stats = Job(name='merge-stats')
        merge_stats.invoke('all', self._state_update % 'Merging adaptor stats file')

        # Outputs
        adaptor_stats = File('%s.adaptor.stats' % self._prefix)

        # Arguments
        merge_stats.addArguments('reads*.stats', adaptor_stats)

        for i in range(self._splits):
            # Inputs
            stats_i = File('reads%d.stats' % i)

            # Uses
            merge_stats.uses(stats_i, link=Link.INPUT)

        # Outputs
        merge_stats.uses(adaptor_stats, link=Link.OUTPUT, transfer=True, register=False)

        self.adag.addJob(merge_stats)
Beispiel #13
0
    def _parse_alignment(self, input_file, tag):
        parse_alignment = Job(name='parse_alignment')
        parse_alignment.invoke('all', self._state_update % 'Parse alignment')

        # Input files
        input_file = File(input_file)

        # Output files

        vis = File('%s.vis' % input_file.name)

        self._vis_files.append(vis.name)

        # Arguments
        parse_alignment.addArguments(input_file, '--strandRule', self._strand_rule, '--tag', tag)
        parse_alignment.setStdout(vis)

        # Uses
        parse_alignment.uses(input_file, link=Link.INPUT)
        parse_alignment.uses(vis, link=Link.OUTPUT, transfer=False, register=False)

        self.adag.addJob(parse_alignment)
Beispiel #14
0
    def _merge_info(self, info_files, gtf_file):
        merge_info = Job(name='merge-info')
        merge_info.invoke('all', self._state_update % 'Merging info files to generate GTF file')

        # Outputs
        gtf_file = File(gtf_file)

        for info_file in info_files:
            # Inputs
            info_i = File(info_file)

            # Arguments
            merge_info.addArguments(info_i)

            # Uses
            merge_info.uses(info_i, link=Link.INPUT)

        # Arguments
        merge_info.addArguments(gtf_file)

        # Outputs
        merge_info.uses(gtf_file, link=Link.OUTPUT, transfer=True, register=False)

        self.adag.addJob(merge_info)