def createFastq(self): """ Creates Fastq files :return: """ for barcodeName in self.json_barcodes.keys(): # build our new filename and move this file bam = self.json_barcodes[barcodeName]['bam_filepath'] if not os.path.exists(bam): continue # generate final place for the fastq file fastqFileName = self.applyBarcodeName(barcodeName) + '.fastq' finalName = os.path.join(self.pluginDir, fastqFileName) # create and execute the subprocess command command = blockprocessing.bam2fastq_command(bam, finalName) subprocess.call(command,shell=True) # create a symlink for the fastq in the downloads folder downloadsName = os.path.join(self.downloadDir, fastqFileName) if os.path.exists(finalName): if os.path.lexists(downloadsName): os.remove(downloadsName) os.symlink(finalName, downloadsName)
def createFastq(self): """ Creates Fastq files :return: """ for barcodeName in self.json_barcodes.keys(): # build our new filename and move this file bam = self.json_barcodes[barcodeName]['bam_filepath'] if not os.path.exists(bam): continue # generate final place for the fastq file fastqFileName = self.applyBarcodeName(barcodeName) + '.fastq' finalName = os.path.join(self.pluginDir, fastqFileName) # create and execute the subprocess command command = blockprocessing.bam2fastq_command(bam, finalName) subprocess.call(command, shell=True) # create a symlink for the fastq in the downloads folder downloadsName = os.path.join(self.downloadDir, fastqFileName) if os.path.exists(finalName): if os.path.lexists(downloadsName): os.remove(downloadsName) os.symlink(finalName, downloadsName)
def bam2fastq_picard(self, bam_filename_list, fastq_filename): try: com = blockprocessing.bam2fastq_command(bam_filename_list[0], fastq_filename) ret = subprocess.call(com, shell=True) except: traceback.print_exc()
def launch(self): with open('barcodes.json', 'r') as fh: self.barcodes = json.load( fh) # NOTE: Not available in memory like startplugin root_report_path = self.startplugin['runinfo']['report_root_dir'] run_name = self.startplugin['expmeta']['run_name'] result_name = self.startplugin['expmeta']['results_name'] # instrument_name = self.startplugin['expmeta']['instrument'] # experiment_id = self.startplugin['expmeta']['results_name'].split('_')[-1] # Add barcde information and store in collection covid_samples = SampleCollection() for sample_name, barcode_info in self.startplugin['plan'][ 'barcodedSamples'].items(): sample_barcodes = barcode_info['barcodes'] for barcode in sample_barcodes: covid_samples.add_sample(barcode, sample_name) # Filter away samples without matching metadata no_metadata_samples = SampleCollection() for barcode, sample_name in covid_samples.sample_info.copy().items( ): # NOTE: Copy as we remove what we iterate through otherwise try: self.startplugin['pluginconfig']['input_metadata'][sample_name] except KeyError: no_metadata_samples.add_sample(barcode, sample_name) covid_samples.remove_sample(barcode) # Exit if no valid samples left if not covid_samples.sample_info: progress_renderer = ProgressRender() progress_renderer.add_subheader('Metadata error:') progress_renderer.add_line( 'No samples could be linked to the supplied metadata. Was the correct file chosen?' ) progress_renderer.render() return False # Parse for plugin outputs root_plugin_output_path = os.path.join(root_report_path, 'plugin_out') # Pangolin plugin_name = config.pangolin_plugin_name plugin_outputs = find_plugin_outputs(plugin_name, root_plugin_output_path) latest_plugin_output_id = max(plugin_outputs, key=plugin_outputs.get) latest_plugin_output_path = plugin_outputs[latest_plugin_output_id] pangolin_csv_path = os.path.join( latest_plugin_output_path, '{}.xls'.format(run_name)) #NOTE: It's actually a csv pangolin_csv_info = parse_pangolin_csv(pangolin_csv_path) # Filter away samples that failed pangolin QC # This is done on the torrent specific/modified pangolin output because it contains both qc-fields # where as the raw pangolin outputs are version dependant. qc_failed_samples = SampleCollection() for barcode, pangolin_result in pangolin_csv_info.items(): if barcode not in covid_samples.barcodes: # Already filtered away continue if pangolin_result['status'] != 'passed_qc' or pangolin_result[ 'passes'] != 'Passed': sample_id = covid_samples.remove_sample(barcode) qc_failed_samples.add_sample(barcode, sample_id) pangolin_fasta_path = os.path.join(latest_plugin_output_path, '{}.fasta'.format(run_name)) # Read fastas into memory on barcode keys sample_fastas = read_fastas(pangolin_fasta_path) # Variant caller plugin_name = config.variant_caller_name plugin_outputs = find_plugin_outputs(plugin_name, root_plugin_output_path) latest_plugin_output_id = max(plugin_outputs, key=plugin_outputs.get) latest_plugin_output_path = plugin_outputs[latest_plugin_output_id] # Read vcf paths into memory on barcode keys sample_vcfs = read_vcf_paths(latest_plugin_output_path) # Dump data at designated location, under result name output_path = os.path.join(config.root_dump_path, result_name) if not os.path.exists(output_path): os.makedirs(output_path) transfer_failed_samples = SampleCollection() for sample_barcode, sample_name in covid_samples.sample_info.items(): try: # FASTA sample_fasta_sequence = sample_fastas[sample_barcode] sample_fasta_output_path = os.path.join( output_path, '{}.fa'.format(sample_name)) with open(sample_fasta_output_path, 'w') as out: out.write('>{}\n'.format(sample_name)) out.write(sample_fasta_sequence) # VCF sample_vcf_path = sample_vcfs[sample_barcode] sample_vcf_name = '{}.vcf.gz'.format(sample_name) sample_vcf_output_path = os.path.join(output_path, sample_vcf_name) if not os.path.exists(sample_vcf_name): shutil.copyfile(sample_vcf_path, sample_vcf_output_path) # FASTQ sample_bam_path = self.barcodes[sample_barcode]['bam_filepath'] sample_fastq_name = '{}.fastq'.format(sample_name) sample_fastq_output_path = os.path.join( output_path, sample_fastq_name) sample_fastq_gzip_output_path = sample_fastq_output_path + '.gz' if not os.path.exists(sample_fastq_gzip_output_path): if not os.path.exists(sample_fastq_output_path): command = blockprocessing.bam2fastq_command( sample_bam_path, sample_fastq_output_path) subprocess.check_call( command, shell=True ) # NOTE Security issue. Enables shell commands when user defines sample names subprocess.check_call(['gzip', sample_fastq_output_path ]) # Edits in place except Exception as e: self.log.error(e) transfer_failed_samples.add_sample(sample_barcode, sample_name) covid_samples.remove_sample(sample_barcode) metadata = self.startplugin['pluginconfig']['input_metadata'] # Remove failed prior to dump for _, sample_name in qc_failed_samples.sample_info.items(): try: metadata.pop(sample_name) except KeyError: # Not all failed may have metadata continue # Repeat of previous pangolin result parsing # This is done in hindsight when variable names were reused thus making below necessary plugin_name = config.pangolin_plugin_name plugin_outputs = find_plugin_outputs(plugin_name, root_plugin_output_path) latest_plugin_output_id = max(plugin_outputs, key=plugin_outputs.get) latest_plugin_output_path = plugin_outputs[latest_plugin_output_id] # Grab all raw sample pangolin outputs pangolin_merger = PangolinMerger() sample_pangolin_result_root_paths = glob.glob( os.path.join(latest_plugin_output_path, 'IonCode_*')) for root_path in sample_pangolin_result_root_paths: pangolin_result = os.path.join(root_path, 'lineage_report.csv') with open(pangolin_result, 'r') as inp: csv_handle = csv.reader(inp, delimiter=',') headers = next(csv_handle) pangolin_merger.add_headers(headers) for row in csv_handle: taxon_name = row[0] taxon_split = taxon_name.split( '_', 2) # NOTE: 2 ensures no split on lab supplied name sample_name = taxon_split[-1] sample_barcode = '_'.join(taxon_split[0:2]) if sample_name not in metadata: continue mod_row = [sample_name] + row[1:] pangolin_merger.add_row(mod_row) # Write collected pangolin results to single file pangolin_csv_output_path = os.path.join(output_path, 'pangolin.csv') pangolin_merger.write(pangolin_csv_output_path) # Dump metadata metadata_output_path = os.path.join(output_path, 'metadata.json') with open(metadata_output_path, 'w') as out: json.dump(metadata, out, indent=4) # Setup class for rendering html to user on plugin page progress_renderer = ProgressRender() progress_renderer.add_subheader('Transferred Samples:') for sample_barcode, sample_name in covid_samples.sample_info.items(): metadata = self.startplugin['pluginconfig']['input_metadata'][ sample_name] progress_renderer.add_line('\t'.join( [sample_barcode, sample_name, metadata])) progress_renderer.add_subheader('Transfer Failed Samples:') for sample_barcode, sample_name in transfer_failed_samples.sample_info.items( ): progress_renderer.add_line('\t'.join([sample_barcode, sample_name])) progress_renderer.add_subheader('QC Failed Samples:') for sample_barcode, sample_name in qc_failed_samples.sample_info.items( ): progress_renderer.add_line('\t'.join([sample_barcode, sample_name])) progress_renderer.add_subheader('No Metadata Samples:') for sample_barcode, sample_name in no_metadata_samples.sample_info.items( ): progress_renderer.add_line('\t'.join([sample_barcode, sample_name])) progress_renderer.render() return True