Example #1
0
    def clean_reads(self, dataPath, name, sampleType):
        """Trim adapter sequences from the extracted reads, format and organize
        the cleaned reads into new files.

        Cutadapt is run to trim the adapter sequences from the sequence reads to
        remove any 'noise' from the assembly process. The cleaned reads output 
        from cutadapt are then reprocessed to determine if the softclipped sequences 
        were trimmed off or not to further filter out reads. 

        The softclipped sequences that remain are stored and a new fastq file is written.

        Args:
            dataPath (str):   The path to the data files for this target.
            name (str):       The target name.
            type (str):       A string indicating a tumor ('sv') or normal ('norm') sample being processed.
        Return:
            check (boolean):  A boolean to indicate whether the are any reads left after
                              cleaning is complete.
        """

        cutadapt = self.params.get_param('cutadapt')  # Cutadapt binary
        cutadaptConfigFn = self.params.get_param('cutadapt_config_file')
        utils.log(
            self.loggingName, 'info',
            'Cleaning reads using %s with configuration file %s' %
            (cutadapt, cutadaptConfigFn))
        self.files['%s_cleaned_fq' % sampleType] = os.path.join(
            dataPath, name + '_%s_reads_cleaned.fastq' % sampleType)
        utils.log(
            self.loggingName, 'info', 'Writing clean reads to %s' %
            self.files['%s_cleaned_fq' % sampleType])
        output, errors = utils.run_cutadapt(
            cutadapt, cutadaptConfigFn, self.files['%s_fq' % sampleType],
            self.files['%s_cleaned_fq' % sampleType], self.loggingName)

        self.setup_cleaned_reads(sampleType)
        self.files['%s_cleaned_fq' % sampleType], self.cleaned_read_recs[
            sampleType] = utils.get_fastq_reads(
                self.files['%s_cleaned_fq' % sampleType],
                self.get_sv_reads(sampleType))
        self.clear_sv_reads(sampleType)
        check = self.continue_analysis_check(sampleType)
        utils.log(self.loggingName, 'info', 'Clean reads exist %s' % check)
        return check
Example #2
0
    def clean_reads(self, sample_type):

        '''
        '''

        # Run cleaning program
        cutadapt = self.params.get_param('cutadapt')
        cutadapt_config = self.params.get_param('cutadapt_config_file')
        utils.log(self.logging_name, 'info', 'Cleaning reads using %s with configuration file %s' % (cutadapt, cutadapt_config))

        self.files['%s_cleaned_fq' % sample_type] = os.path.join(self.paths['data'], self.name + "_%s_reads_cleaned.fastq" % sample_type)

        utils.log(self.logging_name, 'info', 'Writing clean reads to %s' % self.files['%s_cleaned_fq' % sample_type])
        cutadapt_parameters = utils.stringify(cutadapt_config)
        cutadapt_cmd = '%s %s %s %s > %s' % (sys.executable, cutadapt, cutadapt_parameters, self.files['%s_fq' % sample_type], self.files['%s_cleaned_fq' % sample_type])
        utils.log(self.logging_name, 'debug', 'Cutadapt system command %s' % cutadapt_cmd)
        cutadapt_proc = subprocess.Popen(cutadapt_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        output, errors = cutadapt_proc.communicate()
        utils.log(self.logging_name, 'debug', 'Clean reads output %s' % output)
        utils.log(self.logging_name, 'debug', 'Clean reads errors %s' % errors)

        # Use these for pulling out reads after finding sample-only kmers.
        # Filter the cleaned reads to make sure soft clips were not adapters, re-write fastq
        if not self.cleaned_read_recs:
            self.cleaned_read_recs = {}
        self.cleaned_read_recs[sample_type] = None
        self.files['%s_cleaned_fq' % sample_type], self.cleaned_read_recs[sample_type] = utils.get_fastq_reads(self.files['%s_cleaned_fq' % sample_type], self.sv_reads[sample_type])
        self.sv_reads[sample_type] = None
        check = True
        if len(self.cleaned_read_recs[sample_type]) == 0:
            check = False

        utils.log(self.logging_name, 'info', 'Check there are cleaned reads %r' % check)
        return check
Example #3
0
    def clean_reads(self, dataPath, name, sampleType):
        """Trim adapter sequences from the extracted reads, format and organize
        the cleaned reads into new files.

        Cutadapt is run to trim the adapter sequences from the sequence reads to
        remove any 'noise' from the assembly process. The cleaned reads output 
        from cutadapt are then reprocessed to determine if the softclipped sequences 
        were trimmed off or not to further filter out reads. 

        The softclipped sequences that remain are stored and a new fastq file is written.

        Args:
            dataPath (str):   The path to the data files for this target.
            name (str):       The target name.
            type (str):       A string indicating a tumor ('sv') or normal ('norm') sample being processed.
        Return:
            check (boolean):  A boolean to indicate whether the are any reads left after
                              cleaning is complete.
        """

        cutadapt = self.params.get_param('cutadapt')  # Cutadapt binary
        cutadaptConfigFn = self.params.get_param('cutadapt_config_file')
        utils.log(self.loggingName, 'info', 'Cleaning reads using %s with configuration file %s' % (cutadapt, cutadaptConfigFn))
        self.files['%s_cleaned_fq' % sampleType] = os.path.join(dataPath, name + '_%s_reads_cleaned.fastq' % sampleType)
        utils.log(self.loggingName, 'info', 'Writing clean reads to %s' % self.files['%s_cleaned_fq' % sampleType])
        output, errors = utils.run_cutadapt(cutadapt, cutadaptConfigFn, self.files['%s_fq' % sampleType], self.files['%s_cleaned_fq' % sampleType], self.loggingName)

        self.setup_cleaned_reads(sampleType)
        self.files['%s_cleaned_fq' % sampleType], self.cleaned_read_recs[sampleType] = utils.get_fastq_reads(self.files['%s_cleaned_fq' % sampleType], self.get_sv_reads(sampleType))
        self.clear_sv_reads(sampleType)
        check = self.continue_analysis_check(sampleType)
        utils.log(self.loggingName, 'info', 'Clean reads exist %s' % check)
        return check
Example #4
0
    def clean_reads(self, sample_type):

        '''
        '''

        # Run cleaning program
        cutadapt = self.params.get_param('cutadapt')
        cutadapt_config = self.params.get_param('cutadapt_config_file')
        utils.log(self.logging_name, 'info', 'Cleaning reads using %s with configuration file %s' % (cutadapt, cutadapt_config))

        self.files['%s_cleaned_fq' % sample_type] = os.path.join(self.paths['data'], self.name + "_%s_reads_cleaned.fastq" % sample_type)

        utils.log(self.logging_name, 'info', 'Writing clean reads to %s' % self.files['%s_cleaned_fq' % sample_type])
        cutadapt_parameters = utils.stringify(cutadapt_config)
        cutadapt_cmd = '%s %s %s %s > %s' % (sys.executable, cutadapt, cutadapt_parameters, self.files['%s_fq' % sample_type], self.files['%s_cleaned_fq' % sample_type])
        utils.log(self.logging_name, 'debug', 'Cutadapt system command %s' % cutadapt_cmd)
        cutadapt_proc = subprocess.Popen(cutadapt_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        output, errors = cutadapt_proc.communicate()
        utils.log(self.logging_name, 'debug', 'Clean reads output %s' % output)
        utils.log(self.logging_name, 'debug', 'Clean reads errors %s' % errors)

        # Use these for pulling out reads after finding sample-only kmers.
        # Filter the cleaned reads to make sure soft clips were not adapters, re-write fastq
        if not self.cleaned_read_recs:
            self.cleaned_read_recs = {}
        self.cleaned_read_recs[sample_type] = None
        self.files['%s_cleaned_fq' % sample_type], self.cleaned_read_recs[sample_type] = utils.get_fastq_reads(self.files['%s_cleaned_fq' % sample_type], self.sv_reads[sample_type])
        self.sv_reads[sample_type] = None
        check = True
        if len(self.cleaned_read_recs[sample_type]) == 0:
            check = False

        utils.log(self.logging_name, 'info', 'Check there are cleaned reads %r' % check)
        return check