コード例 #1
0
ファイル: FileExporter.py プロジェクト: iontorrent/TS
    def createFastq(self):
        """
        Creates Fastq files
        :return:
        """
        for barcodeName in self.json_barcodes.keys():
            # build our new filename and move this file
            bam = self.json_barcodes[barcodeName]['bam_filepath']

            if not os.path.exists(bam):
                continue

            # generate final place for the fastq file
            fastqFileName = self.applyBarcodeName(barcodeName) + '.fastq'
            finalName = os.path.join(self.pluginDir, fastqFileName)

            # create and execute the subprocess command
            command = blockprocessing.bam2fastq_command(bam, finalName)
            subprocess.call(command,shell=True)

            # create a symlink for the fastq in the downloads folder
            downloadsName = os.path.join(self.downloadDir, fastqFileName)
            if os.path.exists(finalName):
                if os.path.lexists(downloadsName):
                    os.remove(downloadsName)
                os.symlink(finalName, downloadsName)
コード例 #2
0
ファイル: FileExporter.py プロジェクト: tw7649116/TS
    def createFastq(self):
        """
        Creates Fastq files
        :return:
        """
        for barcodeName in self.json_barcodes.keys():
            # build our new filename and move this file
            bam = self.json_barcodes[barcodeName]['bam_filepath']

            if not os.path.exists(bam):
                continue

            # generate final place for the fastq file
            fastqFileName = self.applyBarcodeName(barcodeName) + '.fastq'
            finalName = os.path.join(self.pluginDir, fastqFileName)

            # create and execute the subprocess command
            command = blockprocessing.bam2fastq_command(bam, finalName)
            subprocess.call(command, shell=True)

            # create a symlink for the fastq in the downloads folder
            downloadsName = os.path.join(self.downloadDir, fastqFileName)
            if os.path.exists(finalName):
                if os.path.lexists(downloadsName):
                    os.remove(downloadsName)
                os.symlink(finalName, downloadsName)
コード例 #3
0
ファイル: FastqCreator.py プロジェクト: aidjek/TS
 def bam2fastq_picard(self, bam_filename_list, fastq_filename):
     try:
         com = blockprocessing.bam2fastq_command(bam_filename_list[0],
                                                 fastq_filename)
         ret = subprocess.call(com, shell=True)
     except:
         traceback.print_exc()
コード例 #4
0
 def bam2fastq_picard(self, bam_filename_list, fastq_filename):
     try:
         com = blockprocessing.bam2fastq_command(bam_filename_list[0], fastq_filename)
         ret = subprocess.call(com, shell=True)
     except:
         traceback.print_exc()
コード例 #5
0
    def launch(self):
        with open('barcodes.json', 'r') as fh:
            self.barcodes = json.load(
                fh)  # NOTE: Not available in memory like startplugin

        root_report_path = self.startplugin['runinfo']['report_root_dir']

        run_name = self.startplugin['expmeta']['run_name']
        result_name = self.startplugin['expmeta']['results_name']
        # instrument_name = self.startplugin['expmeta']['instrument']
        # experiment_id = self.startplugin['expmeta']['results_name'].split('_')[-1]

        # Add barcde information and store in collection
        covid_samples = SampleCollection()
        for sample_name, barcode_info in self.startplugin['plan'][
                'barcodedSamples'].items():
            sample_barcodes = barcode_info['barcodes']

            for barcode in sample_barcodes:
                covid_samples.add_sample(barcode, sample_name)

        # Filter away samples without matching metadata
        no_metadata_samples = SampleCollection()
        for barcode, sample_name in covid_samples.sample_info.copy().items(
        ):  # NOTE: Copy as we remove what we iterate through otherwise
            try:
                self.startplugin['pluginconfig']['input_metadata'][sample_name]
            except KeyError:
                no_metadata_samples.add_sample(barcode, sample_name)
                covid_samples.remove_sample(barcode)

        # Exit if no valid samples left
        if not covid_samples.sample_info:
            progress_renderer = ProgressRender()
            progress_renderer.add_subheader('Metadata error:')
            progress_renderer.add_line(
                'No samples could be linked to the supplied metadata. Was the correct file chosen?'
            )
            progress_renderer.render()
            return False

        # Parse for plugin outputs
        root_plugin_output_path = os.path.join(root_report_path, 'plugin_out')

        # Pangolin
        plugin_name = config.pangolin_plugin_name
        plugin_outputs = find_plugin_outputs(plugin_name,
                                             root_plugin_output_path)
        latest_plugin_output_id = max(plugin_outputs, key=plugin_outputs.get)
        latest_plugin_output_path = plugin_outputs[latest_plugin_output_id]

        pangolin_csv_path = os.path.join(
            latest_plugin_output_path,
            '{}.xls'.format(run_name))  #NOTE: It's actually a csv
        pangolin_csv_info = parse_pangolin_csv(pangolin_csv_path)

        # Filter away samples that failed pangolin QC
        # This is done on the torrent specific/modified pangolin output because it contains both qc-fields
        # where as the raw pangolin outputs are version dependant.
        qc_failed_samples = SampleCollection()
        for barcode, pangolin_result in pangolin_csv_info.items():
            if barcode not in covid_samples.barcodes:  # Already filtered away
                continue

            if pangolin_result['status'] != 'passed_qc' or pangolin_result[
                    'passes'] != 'Passed':
                sample_id = covid_samples.remove_sample(barcode)
                qc_failed_samples.add_sample(barcode, sample_id)

        pangolin_fasta_path = os.path.join(latest_plugin_output_path,
                                           '{}.fasta'.format(run_name))
        # Read fastas into memory on barcode keys
        sample_fastas = read_fastas(pangolin_fasta_path)

        # Variant caller
        plugin_name = config.variant_caller_name
        plugin_outputs = find_plugin_outputs(plugin_name,
                                             root_plugin_output_path)
        latest_plugin_output_id = max(plugin_outputs, key=plugin_outputs.get)
        latest_plugin_output_path = plugin_outputs[latest_plugin_output_id]

        # Read vcf paths into memory on barcode keys
        sample_vcfs = read_vcf_paths(latest_plugin_output_path)

        # Dump data at designated location, under result name
        output_path = os.path.join(config.root_dump_path, result_name)
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        transfer_failed_samples = SampleCollection()
        for sample_barcode, sample_name in covid_samples.sample_info.items():
            try:
                # FASTA
                sample_fasta_sequence = sample_fastas[sample_barcode]
                sample_fasta_output_path = os.path.join(
                    output_path, '{}.fa'.format(sample_name))

                with open(sample_fasta_output_path, 'w') as out:
                    out.write('>{}\n'.format(sample_name))
                    out.write(sample_fasta_sequence)

                # VCF
                sample_vcf_path = sample_vcfs[sample_barcode]
                sample_vcf_name = '{}.vcf.gz'.format(sample_name)
                sample_vcf_output_path = os.path.join(output_path,
                                                      sample_vcf_name)
                if not os.path.exists(sample_vcf_name):
                    shutil.copyfile(sample_vcf_path, sample_vcf_output_path)

                # FASTQ
                sample_bam_path = self.barcodes[sample_barcode]['bam_filepath']
                sample_fastq_name = '{}.fastq'.format(sample_name)
                sample_fastq_output_path = os.path.join(
                    output_path, sample_fastq_name)

                sample_fastq_gzip_output_path = sample_fastq_output_path + '.gz'
                if not os.path.exists(sample_fastq_gzip_output_path):
                    if not os.path.exists(sample_fastq_output_path):
                        command = blockprocessing.bam2fastq_command(
                            sample_bam_path, sample_fastq_output_path)
                        subprocess.check_call(
                            command, shell=True
                        )  # NOTE Security issue. Enables shell commands when user defines sample names
                    subprocess.check_call(['gzip', sample_fastq_output_path
                                           ])  # Edits in place

            except Exception as e:
                self.log.error(e)
                transfer_failed_samples.add_sample(sample_barcode, sample_name)
                covid_samples.remove_sample(sample_barcode)

        metadata = self.startplugin['pluginconfig']['input_metadata']
        # Remove failed prior to dump
        for _, sample_name in qc_failed_samples.sample_info.items():
            try:
                metadata.pop(sample_name)
            except KeyError:  # Not all failed may have metadata
                continue

        # Repeat of previous pangolin result parsing
        # This is done in hindsight when variable names were reused thus making below necessary
        plugin_name = config.pangolin_plugin_name
        plugin_outputs = find_plugin_outputs(plugin_name,
                                             root_plugin_output_path)
        latest_plugin_output_id = max(plugin_outputs, key=plugin_outputs.get)
        latest_plugin_output_path = plugin_outputs[latest_plugin_output_id]

        # Grab all raw sample pangolin outputs
        pangolin_merger = PangolinMerger()
        sample_pangolin_result_root_paths = glob.glob(
            os.path.join(latest_plugin_output_path, 'IonCode_*'))

        for root_path in sample_pangolin_result_root_paths:
            pangolin_result = os.path.join(root_path, 'lineage_report.csv')
            with open(pangolin_result, 'r') as inp:
                csv_handle = csv.reader(inp, delimiter=',')
                headers = next(csv_handle)
                pangolin_merger.add_headers(headers)
                for row in csv_handle:
                    taxon_name = row[0]
                    taxon_split = taxon_name.split(
                        '_',
                        2)  # NOTE: 2 ensures no split on lab supplied name
                    sample_name = taxon_split[-1]
                    sample_barcode = '_'.join(taxon_split[0:2])

                    if sample_name not in metadata:
                        continue

                    mod_row = [sample_name] + row[1:]
                    pangolin_merger.add_row(mod_row)

        # Write collected pangolin results to single file
        pangolin_csv_output_path = os.path.join(output_path, 'pangolin.csv')
        pangolin_merger.write(pangolin_csv_output_path)

        # Dump metadata
        metadata_output_path = os.path.join(output_path, 'metadata.json')
        with open(metadata_output_path, 'w') as out:
            json.dump(metadata, out, indent=4)

        # Setup class for rendering html to user on plugin page
        progress_renderer = ProgressRender()

        progress_renderer.add_subheader('Transferred Samples:')
        for sample_barcode, sample_name in covid_samples.sample_info.items():
            metadata = self.startplugin['pluginconfig']['input_metadata'][
                sample_name]
            progress_renderer.add_line('\t'.join(
                [sample_barcode, sample_name, metadata]))

        progress_renderer.add_subheader('Transfer Failed Samples:')
        for sample_barcode, sample_name in transfer_failed_samples.sample_info.items(
        ):
            progress_renderer.add_line('\t'.join([sample_barcode,
                                                  sample_name]))

        progress_renderer.add_subheader('QC Failed Samples:')
        for sample_barcode, sample_name in qc_failed_samples.sample_info.items(
        ):
            progress_renderer.add_line('\t'.join([sample_barcode,
                                                  sample_name]))

        progress_renderer.add_subheader('No Metadata Samples:')
        for sample_barcode, sample_name in no_metadata_samples.sample_info.items(
        ):
            progress_renderer.add_line('\t'.join([sample_barcode,
                                                  sample_name]))

        progress_renderer.render()

        return True