Beispiel #1
0
 def extract_bam_unmap(self):
     jobs = []
     
     for sample in self.samples:
         sclip_directory = os.path.join("sclip", sample.name)
         sclip_file_prefix = os.path.join("sclip", sample.name, sample.name + ".")
         extract_directory = os.path.join("extract", sample.name)
         extract_file_prefix = os.path.join("extract", sample.name, sample.name + ".")
         
         jobMkdir = Job(command="if [ ! -d " + extract_directory + " ]; then mkdir -p " + extract_directory + "; fi")
         ## extract Orphan
         job = concat_jobs([
         jobMkdir,
             concat_jobs([
              samtools.view(sclip_file_prefix + "scOthers.bam", extract_file_prefix + "ORPHAN.bam", "-b -h -f 12 -F 256"),
              samtools.sort(extract_file_prefix + "ORPHAN.bam", extract_file_prefix + "ORPHAN.sName", True)
             ])    
         ], name="extract_bam_ORPHAN_" + sample.name)
         
         jobs.append(job)
         
         ## extract OEA close to sclip
         job = concat_jobs([
         jobMkdir,
             concat_jobs([
              samtools.view(sclip_file_prefix + "sc.bam", extract_file_prefix + "OEAUNMAP.1.bam", "-b -h -f 68 -F 264"),
              samtools.sort(extract_file_prefix + "OEAUNMAP.1.bam", extract_file_prefix + "OEAUNMAP.1.sName", True)
             ])    
         ], name="extract_bam_OEAUNMAP1_" + sample.name)
         
         jobs.append(job)
         
         job = concat_jobs([
         jobMkdir,
             concat_jobs([
              samtools.view(sclip_file_prefix + "sc.bam", extract_file_prefix + "OEAUNMAP.2.bam", "-b -h -f 132 -F 264"),
              samtools.sort(extract_file_prefix + "OEAUNMAP.2.bam", extract_file_prefix + "OEAUNMAP.2.sName", True)
             ])    
         ], name="extract_bam_OEAUNMAP2_" + sample.name)
         
         jobs.append(job)
         
         job = concat_jobs([
         jobMkdir,
             concat_jobs([
              samtools.view(sclip_file_prefix + "sc.bam", extract_file_prefix + "OEAMAP.bam", "-b -h -f 8 -F 1284"),
              samtools.sort(extract_file_prefix + "OEAMAP.bam", extract_file_prefix + "OEAMAP.sName", True)
             ])    
         ], name="extract_bam_OEAMAP_" + sample.name)
         
         jobs.append(job)
         
     return jobs
Beispiel #2
0
    def samtools_bam_sort(self):
        """
        Sorts bam by readname prior to picard_sam_to_fastq step in order to minimize memory consumption.
        If bam file is small and the memory requirements are reasonable, this step can be skipped.
        """

        jobs = []
        for readset in self.readsets:
            # If readset FASTQ files are available, skip this step
            if not readset.fastq1:
                if readset.bam:
                    sortedBamPrefix = re.sub("\.bam$", ".sorted",
                                             readset.bam.strip())

                    job = samtools.sort(readset.bam,
                                        sortedBamPrefix,
                                        sort_by_name=True)
                    job.name = "samtools_bam_sort." + readset.name
                    job.removable_files = [sortedBamPrefix + ".bam"]
                    job.samples = [readset.sample]
                    jobs.append(job)
                else:
                    raise Exception(
                        "Error: BAM file not available for readset \"" +
                        readset.name + "\"!")
        return jobs
Beispiel #3
0
    def create_hic_file(self):
        """
        A .hic file is created per sample in order to visualize in JuiceBox, WashU epigenome browser or as input for other tools.
        For more detailed information about the JuiceBox visit: [JuiceBox] (http://www.aidenlab.org/software.html)
        """

        jobs = []

        for sample in self.samples:
            sample_input = os.path.join(self.output_dirs['bams_output_directory'], sample.name, sample.name + ".merged.bam")
            sortedBamPrefix = re.sub("\.merged.bam", ".merged.sorted", sample_input.strip())
            sortedBam = sortedBamPrefix + ".bam"
            hic_output = os.path.join(self.output_dirs['hicfiles_output_directory'], sample.name + ".hic")

            job = concat_jobs([
                Job(command="mkdir -p " + self.output_dirs['hicfiles_output_directory']),
                samtools.sort(sample_input, sortedBamPrefix, sort_by_name=True),
                hic.create_input(sortedBam, sample.name),
                hic.create_hic(sample.name + ".juicebox.input.sorted", hic_output, self.genome)
            ])
            job.name = "create_hic_file." + sample.name
            job.samples = [sample]

            jobs.append(job)

        return jobs
Beispiel #4
0
    def create_hic_file(self):
        """
        A .hic file is created per sample in order to visualize in JuiceBox, WashU epigenome browser or as input for other tools.
        For more detailed information about the JuiceBox visit: [JuiceBox] (http://www.aidenlab.org/software.html)
        """

        jobs = []

        for sample in self.samples:
            sample_input = os.path.join(
                self.output_dirs['bams_output_directory'], sample.name,
                sample.name + ".merged.bam")
            sortedBamPrefix = re.sub("\.merged.bam", ".merged.sorted",
                                     sample_input.strip())
            sortedBam = sortedBamPrefix + ".bam"
            hic_output = os.path.join(
                self.output_dirs['hicfiles_output_directory'],
                sample.name + ".hic")

            command_sort = samtools.sort(sample_input,
                                         sortedBamPrefix,
                                         sort_by_name=True)

            command_input = Job(
                input_files=[sortedBam],
                output_files=[
                    sample.name + ".juicebox.input",
                    sample.name + ".juicebox.input.sorted"
                ],
                module_entries=[["create_hic_file", "module_mugqic_tools"]],
                name="create_hic_file." + sample.name,
                command="bash {CreateHicFileInput} {sortedBam} {name} {tmpDir}"
                .format(CreateHicFileInput='CreateHicFileInput.sh',
                        sortedBam=sortedBam,
                        name=sample.name,
                        tmpDir=os.path.expandvars("$(pwd)")),
                removable_files=[
                    sample.name + ".juicebox.input",
                    sample.name + ".juicebox.input.sorted", sortedBam
                ])

            command_juicebox = Job(
                input_files=[sample.name + ".juicebox.input.sorted"],
                output_files=[hic_output],
                module_entries=[["create_hic_file", "module_java"]],
                name="create_hic_file." + sample.name,
                command=
                "mkdir -p {hic_output} && java -jar {juicer} pre -q {q} {name} {output} {assembly}"
                .format(
                    hic_output=self.output_dirs['hicfiles_output_directory'],
                    juicer=os.path.expandvars(
                        config.param('create_hic_file', 'JuicerPath')),
                    q=config.param('create_hic_file', 'q'),
                    name=sample.name + ".juicebox.input.sorted",
                    output=hic_output,
                    assembly=self.genome))

            job = concat_jobs([command_sort, command_input, command_juicebox])
            job.name = "create_hic_file." + sample.name

            jobs.append(job)
        return jobs