def gatk_merge_vcfs(self, exe, reference, sample): """Gather all created VCFs and combine them.""" out = f"{self.local_output}/GATK/VCFs/{sample}-genotype_variants.vcf.gz" out_merged = f"{self.local_output}/GATK/VCFs/{sample}-merged_variants.g.vcf.gz" if exe == 1: os.chdir(self.home) relevant_path = "output/GATK/VCFs/" included_extensions = ['-named_variants.g.vcf'] file_names = [ fn for fn in os.listdir(relevant_path) if any( fn.endswith(ext) for ext in included_extensions) ] input_variant_files_list = [] variants = [] for gvcf in file_names: sam = gvcf.split("-")[0] if sam in self.ori_samples: input_variant_files_list.append(gvcf) variants.append('-V') variants.append("output/GATK/VCFs/" + gvcf) args = [ self.gatk, 'CombineGVCFs', variants, '-R', reference, '-O', out_merged ] result = Cmd['java']['-jar'][args] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes= f'[GATK CombineGVCFs] combining g.VCFs was successful.', logger_error_mes= f'[GATK CombineGVCFs] For some reason the combining of g.VCFs ' f'was not successful.', runtime_error_mes=f'GATK CombineGVCFs failed.') args = [ self.gatk, 'GenotypeGVCFs', '-V', out_merged, '-R', reference, '-O', out ] result = Cmd['java']['-jar'][args] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes=f'[GATK GenotypeGVCFs] Genotyping was successful.', logger_error_mes= f'[GATK GenotypeGVCFs] For some reason genotyping ' f'was not successful.', runtime_error_mes=f'GATK GenotypeGVCFs failed.') return out
def main(self): if self.containername: self.gcrTag = "eu.gcr.io/" + self.project + "/" + self.containername print(info | "Creation of tagged containter image") print(info | "Building docker image from path " + self.buildpath ) docker["build", "-t", "kube/" + self.containername, self.buildpath] & TEE() print(info | "Tagging the image") docker["tag", "kube/" + self.containername, self.gcrTag] & TEE() print(info | "Pushing the image to " + self.gcrTag) gcloud["docker", "--project", self.project, "--", "push", self.gcrTag] & TEE() print(info | "Done pushing image to gcr")
def gatk_variant_selection(self, sample, in1, out, reference, parameters, exe): """Select a subset of variants from a VCF file GATK.""" if exe == 1: os.chdir(self.home) args = [ self.gatk, 'SelectVariants', '-R', reference, '-V', in1, '-O', out, '-select-type', parameters ] result = Cmd['java']['-jar'][args] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes= f'[GATK SelectVariants] Sample {sample} Selecting a subset of variants was successful.', logger_error_mes= f'[GATK SelectVariants] For some reason Selecting a subset of variants of {sample} ' f'was not successful.', runtime_error_mes=f'GATK SelectVariants failed ({sample}).') return out
def run(self, inputs, outputs): """Run analysis.""" basename = Path(inputs.ref_seq.output.fasta.path).name assert basename.endswith(".fasta") name = basename[:-6] index_dir = Path("hisat2_index") index_dir.mkdir() shutil.copy(Path(inputs.ref_seq.output.fasta.path), Path.cwd()) shutil.copy(Path(inputs.ref_seq.output.fastagz.path), Path.cwd()) shutil.copy(Path(inputs.ref_seq.output.fai.path), Path.cwd()) args = [ inputs.ref_seq.output.fasta.path, index_dir / f"{name}_index", "-p", self.requirements.resources.cores, ] return_code, _, _ = Cmd["hisat2-build"][args] & TEE(retcode=None) if return_code: self.error("Error occurred while preparing the HISAT2 index.") outputs.index = index_dir.name outputs.fasta = f"{name}.fasta" outputs.fastagz = f"{name}.fasta.gz" outputs.fai = f"{name}.fasta.fai" outputs.species = inputs.ref_seq.output.species outputs.build = inputs.ref_seq.output.build
def run(self, inputs, outputs): """Run analysis.""" basename = os.path.basename(inputs.slamdunk.bam.path) assert basename.endswith(".bam") name = basename[:-4] args = [ "-o", "snpeval", "-r", inputs.ref_seq.fasta.path, "-b", inputs.regions.bed.path, "-s", ".", "-l", inputs.read_length, ] (Cmd["ln"]["-s", inputs.slamdunk.variants.path, f"{name}_snp.vcf"])() return_code, _, _ = Cmd["alleyoop"]["snpeval"][args][ inputs.slamdunk.bam.path] & TEE(retcode=None) if return_code: self.error("Alleyoop snpeval analysis failed.") snp_file = os.path.join("snpeval", f"{name}_SNPeval.csv") snp_file_renamed = os.path.join("snpeval", f"{name}_SNPeval.txt") os.rename(snp_file, snp_file_renamed) outputs.report = snp_file_renamed outputs.plot = os.path.join("snpeval", f"{name}_SNPeval.pdf") outputs.species = inputs.slamdunk.species outputs.build = inputs.slamdunk.build
def picard_rg(self, sample, in1, ref_organism, exe): """Add or replace read groups with Picard.""" out = f'{self.local_output}/GATK/{sample}.RG.bam' if exe == 1: os.chdir(self.home) args = [ self.picard, 'AddOrReplaceReadGroups', f'INPUT={in1}', f'OUTPUT={out}', f'RGSM={sample}', f'RGPU=none', f'RGLB={ref_organism}', f'RGPL=ILLUMINA' ] result = Cmd['java']['-jar'][args] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes=f'[RG] Sample {sample} RG was successfully.', logger_error_mes= f'[RG] For some reason the RG of {sample} was not successful.', runtime_error_mes=f'RG failed ({sample}).') return out
def bbduk_single(self, sample, r1_name, r1, exe, n_cores, ktrim, qtrim, trimq, k, mink, hdist, ftm, chastityfilter, minlen, adapters): """Trimming with BBduk single reads.""" trimmed = f'{self.local_output}/trimmed/BBduk_{r1_name}.fastq' if exe == 1: os.chdir(self.home) args = [ f'in={r1}', f'out={trimmed}', f'ref={self.local_adapters}/{adapters}', f'ktrim={ktrim}', f'qtrim={qtrim}', f'trimq={trimq}', f'overwrite=true', f'k={k}', f'mink={mink}', f'hdist={hdist}', f'tpe', f'tbo', f'ftm={ftm}', f'chastityfilter={chastityfilter}', f'minlen={minlen}', f'threads={n_cores}' ] # run BBduk result = Cmd['bbduk.sh'][args] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes=f'[BBduk] Sample {sample} trimmed successfully.', logger_error_mes= f'[BBduk] For some reason the trimming of {sample} was not successful.', runtime_error_mes=f'BBduk failed ({sample}).') return trimmed
def bwa(self, sample, reference, in1, in2, exe, core): """Aligning with BWA.""" out_bam = f'{self.local_output}/aligned/{sample}.bam' if exe == 1: os.chdir(self.home) out_bam = f'{self.local_output}/aligned/{sample}.bam' bwa = Cmd['bwa'] samtools = Cmd['samtools'] args = ["mem", '-t', core, '-M', reference, in1, in2] # run BWA result = (bwa[args] | samtools['fixmate', '-m', '-', '-'] | samtools['sort', '-o', out_bam]) & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes=f'[BWA] Sample {sample} aligned successfully.', logger_error_mes= f'[BWA] For some reason the alignment of {sample} was not successful.', runtime_error_mes=f'BWA failed ({sample}).') return out_bam
def run(self, inputs, outputs): """Run the analysis.""" genome_build = inputs.genome.build annotation_build = inputs.annotation.build if genome_build != annotation_build: self.error( "Builds of the genome {} and annotation {} do not match. Please provide genome " "and annotation with the same build.".format( genome_build, annotation_build)) genome_species = inputs.genome.species annotation_species = inputs.annotation.species if genome_species != annotation_species: self.error( "Species of genome {} and annotation {} do not match. Please provide genome " "and annotation with the same species.".format( genome_species, annotation_species)) cmd = Cmd["cellranger"]["mkref"] cmd = cmd["--genome={}".format(genome_build)] cmd = cmd["--genes={}".format(inputs.annotation.annot_sorted.path)] cmd = cmd["--fasta={}".format(inputs.genome.fasta.path)] cmd = cmd["--nthreads={}".format(self.requirements.resources.cores)] cmd = cmd["--memgb={}".format( int(self.requirements.resources.memory * 0.9 / 1024))] return_code, _, _ = cmd & TEE(retcode=None) if return_code: self.error("Error while running cellranger mkref.") os.rename(genome_build, "cellranger_index") outputs.genome_index = "cellranger_index" outputs.source = inputs.annotation.source outputs.species = genome_species outputs.build = genome_build
def run(self, inputs, outputs): """Run analysis.""" basename = os.path.basename(inputs.slamdunk.bam.path) assert basename.endswith(".bam") name = basename[:-4] args = [ "-o", "rates", "-r", inputs.ref_seq.fasta.path, ] return_code, _, _ = Cmd["alleyoop"]["rates"][args][ inputs.slamdunk.bam.path] & TEE(retcode=None) if return_code: self.error("Alleyoop rates analysis failed.") rates_file = os.path.join("rates", f"{name}_overallrates.csv") rates_file_renamed = os.path.join("rates", f"{name}_overallrates.txt") os.rename(rates_file, rates_file_renamed) outputs.report = rates_file_renamed outputs.plot = os.path.join("rates", f"{name}_overallrates.pdf") outputs.species = inputs.slamdunk.species outputs.build = inputs.slamdunk.build
def run(self, inputs, outputs): """Run MethylationArraySesame process.""" dirdata = Path("./data") if not dirdata.exists(): dirdata.mkdir() red = inputs.idat_file.output.red_channel.path green = inputs.idat_file.output.green_channel.path [copy2(src=x, dst=dirdata.name) for x in [red, green]] platform = inputs.idat_file.output.platform manifest = f"{platform}.hg38.manifest" sesame_args = [ f"--platform={platform}", f"--manifest={manifest}", ] rc, _, _ = Cmd["sesame.R"][sesame_args] & TEE(retcode=None) # Returns QC_data.txt and beta_values_annotated.txt.gz if rc: self.error( "An error was encountered during the running of SeSAMe pipeline." ) outputs.qc_data = "QC_data.txt" outputs.methylation_data = "beta_values_annotated.txt.gz" outputs.species = inputs.idat_file.output.species outputs.platform = platform
def run(self, inputs, outputs): """Run analysis.""" name = Path(inputs.bam.output.bam.path).stem variants = name + ".g.vcf" variants_gz = variants + ".gz" variants_index = variants_gz + ".tbi" args = [ "-R", inputs.ref_seq.output.fasta.path, "-I", inputs.bam.output.bam.path, "-O", variants, "-contamination", inputs.options.contamination, "-G", "StandardAnnotation", "-G", "StandardHCAnnotation", "-G", "AS_StandardAnnotation", "-GQB", 10, "-GQB", 20, "-GQB", 30, "-GQB", 40, "-GQB", 50, "-GQB", 60, "-GQB", 70, "-GQB", 80, "-GQB", 90, "-ERC", "GVCF", ] if inputs.options.intervals: args.extend(["-L", inputs.options.intervals.output.bed.path]) return_code, _, _ = Cmd["gatk"]["HaplotypeCaller"][args] & TEE( retcode=None) if return_code: self.error("GATK HaplotypeCaller tool failed.") # Compress and index the output variants file (Cmd["bgzip"]["-c", variants] > variants_gz)() Cmd["tabix"]["-p", "vcf", variants_gz]() outputs.vcf = variants_gz outputs.tbi = variants_index outputs.species = inputs.bam.output.species outputs.build = inputs.bam.output.build
def picard_md(self, sample, in1, exe): """Marking duplicates with Picard.""" out = f'{self.local_output}/GATK/{sample}.Sorted.dedup.bam' if exe == 1: os.chdir(self.home) out_duplicated_metric = f'{self.local_output}/GATK/{sample}.sorted.dedup.metrics.txt' args = [ self.picard, 'MarkDuplicates', f'INPUT={in1}', f'OUTPUT={out}', f'METRICS_FILE={out_duplicated_metric}' ] result = Cmd['java']['-jar'][args] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes= f'[MD] Sample {sample} Marking duplicates was successfully.', logger_error_mes= f'[MD] For some reason the Marking duplicates of {sample} was not successful.', runtime_error_mes=f'Marking duplicates failed ({sample}).') return out
def run(command, retcode=0): """Execute a plumbum command, depending on the user's settings. Args: command: The plumbumb command to execute. """ return command & TEE(retcode=retcode)
def run(self, inputs, outputs): """Run analysis.""" basename = Path(inputs.ref_seq.fasta.path).name assert basename.endswith(".fasta") name = basename[:-6] index_dir = Path("BWA_index") index_dir.mkdir() shutil.copy(Path(inputs.ref_seq.fasta.path), Path.cwd()) shutil.copy(Path(inputs.ref_seq.fastagz.path), Path.cwd()) shutil.copy(Path(inputs.ref_seq.fai.path), Path.cwd()) args = [ "-p", index_dir / f"{name}.fasta", inputs.ref_seq.fasta.path, ] return_code, _, _ = Cmd["bwa"]["index"][args] & TEE(retcode=None) if return_code: self.error("Error occurred while preparing the BWA index.") outputs.index = index_dir.name outputs.fasta = f"{name}.fasta" outputs.fastagz = f"{name}.fasta.gz" outputs.fai = f"{name}.fasta.fai" outputs.species = inputs.ref_seq.species outputs.build = inputs.ref_seq.build
def gatk_BaseRecalibrator(self, sample, bam, ref_fasta, exe, ref_vcf): """Builds a model and then applies it to get new quality scores""" bqsr_recal_table = f"{sample}_recal_data.table" out = f"{sample}.bqsrCal.bam" if exe == 1: os.chdir(self.home) args = [ self.gatk, 'BaseRecalibrator', '-R', ref_fasta, '-I', bam, '--use-original-qualities', '-known-sites', ref_vcf, '-O', bqsr_recal_table ] result = Cmd['java']['-jar'][args] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes= f'[GATK] Sample {sample} Base model calibration was successful.', logger_error_mes= f'[GATK] For some reason base model calibration for {sample} was not successful.', runtime_error_mes=f'Base model calibration failed ({sample}).') # Run ApplyBQSR args = [ self.gatk, 'ApplyBQSR', '--add-output-sam-program-record', '-R', ref_fasta, '-I', bam, '--use-original-qualities', '--bqsr-recal-file', bqsr_recal_table, '-O', out ] result = Cmd['java']['-jar'][args] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes= f'[GATK] Sample {sample} quality score correction was successful.', logger_error_mes= f'[GATK] For some quality score correction of {sample} was not successful.', runtime_error_mes=f'Quality score correction failed ({sample}).' ) return out
def kube_refresh(self, files): """ kubernetes files that would be deployed.Format to specify a file -f/path-to-file Note this is not currently working reliablly. Instead run delete and deploy commands to refresh kubernetes """ for file in files: path = os.getcwd() + file print(info | "Deploying file" + path) kubectl["replace", "--force", "-f", path, "--validate=false"] & TEE()
def fg(a, *cmds): fg_return = local[a][cmds] & TEE(retcode=None) fg_return_code = fg_return[0] if fg_return_code != 0: print(f"Failed to execute in foreground, error code: {fg_return_code}") return False else: return True
def run(self, inputs, outputs): """Run analysis.""" # Get input reads file name (for the first of the possible multiple lanes) name = os.path.basename(inputs.reads.fastq[0].path).strip('.fastq.gz') # Concatenate multi-lane read files (Cmd['cat'][[reads.path for reads in inputs.reads.fastq]] > 'input_reads.fastq.gz')() if inputs.options.quality_cutoff is not None: read_trim_cutoff = '--quality-cutoff={}'.format(inputs.options.quality_cutoff) else: read_trim_cutoff = '--nextseq-trim={}'.format(inputs.options.nextseq_trim) first_pass_input = [ '-m', inputs.options.min_len, '-O', inputs.options.min_overlap, '-n', inputs.options.times, '-a', 'polyA=A{20}', '-a', 'QUALITY=G{20}', '-j', self.requirements.resources.cores, 'input_reads.fastq.gz', ] second_pass_input = [ '-m', inputs.options.min_len, read_trim_cutoff, '-a', 'truseq=A{18}AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC', '-j', self.requirements.resources.cores, '-', ] third_pass_input = [ '-m', inputs.options.min_len, '-O', inputs.options.min_overlap, '-g', 'truseq=A{18}AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC', '--discard-trimmed', '-j', self.requirements.resources.cores, '-o', '{}_trimmed.fastq.gz'.format(name), '-', ] # Run Cutadapt, write analysis reports into a report file ( Cmd['cutadapt'][first_pass_input] | Cmd['cutadapt'][second_pass_input] | Cmd['cutadapt'][third_pass_input] > 'cutadapt_report.txt' )() # Prepare final FASTQC report fastqc_args = ['{}_trimmed.fastq.gz'.format(name), 'fastqc', 'fastqc_archive', 'fastqc_url', '--nogroup'] return_code, _, _ = Cmd['fastqc.sh'][fastqc_args] & TEE(retcode=None) if return_code: self.error("Error while preparing FASTQC report.") # Save the outputs outputs.fastq = ['{}_trimmed.fastq.gz'.format(name)] outputs.report = 'cutadapt_report.txt'
def run(self, inputs, outputs): """Run analysis.""" basename = os.path.basename(inputs.tcount.tcount.path) assert basename.endswith(".txt") name = basename[:-4] rc_file = name + "_rc.txt.gz" tpm_file = name + "_tmp.txt.gz" prepare_expressions(inputs.tcount.tcount.path, rc_file, tpm_file) for exp_file in [rc_file, tpm_file]: if not os.path.isfile(exp_file): self.error( "Failed to parse tcout file. {} file was not created".format( exp_file ) ) # Save the abundance estimates to JSON storage Cmd["expression2storage.py"]("--output", "json.txt", tpm_file) # Prepare expression set file with feature_id -> gene_id mappings exp_set_args = [ "--expressions", rc_file, "--source_db", inputs.source, "--species", inputs.tcount.species, "--output_name", name + "_expressions", "--norm_expressions", tpm_file, "--norm_expressions_type", "TPM", ] return_code, _, _ = Cmd["create_expression_set.py"][exp_set_args] & TEE( retcode=None ) if return_code: self.error("Error while preparing the expression set file.") outputs.exp = tpm_file outputs.exp_json = "json.txt" outputs.exp_type = "TPM" outputs.rc = rc_file outputs.exp_set = name + "_expressions.txt.gz" outputs.exp_set_json = name + "_expressions.json" outputs.species = inputs.tcount.species outputs.build = inputs.tcount.build outputs.source = inputs.source outputs.feature_type = "gene"
def test(filename, *args): command = local[LOCAL_DIR / filename / filename] command = command['--nosplash'] for arg in args: command = command[arg] colors.info.print('Running', command) with local.cwd(LOCAL_DIR / filename): with Timer() as t: code, stdout, stderr = command & TEE(retcode=None) if code==0: colors.success.print(filename, 'Successful') else: colors.fatal.print(filename, 'Failed with status code:', code) return dict(name=filename+' '+' '.join(map(str,args)), code=code, time=t.interval, stdout=stdout, stderr=stderr)
def fastqc_single(self, status, sample, in1): """Create a report of fasta file quality.""" os.chdir(self.local_input) result = Cmd['fastqc'][in1, '-o', f'{self.local_output}/fastqc/{status}/'] & TEE( retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes=f'[Fastqc] Sample {sample} analysed successfully.', logger_error_mes= f'[FastQC] For some reason the analysis of {sample} was not successful.', runtime_error_mes=f'FastQC failed ({sample}).')
def run(self, inputs, outputs): """Run analysis.""" basename = os.path.basename(inputs.bam.bam.path) assert basename.endswith(".bam") name = basename[:-4] args = [ "--INPUT", inputs.bam.bam.path, "--REFERENCE", inputs.genome.fasta.path, "--METRICS_FILE_PREFIX", name, "--C_QUALITY_THRESHOLD", inputs.min_quality, "--NEXT_BASE_QUALITY_THRESHOLD", inputs.next_base_quality, "--MINIMUM_READ_LENGTH", inputs.min_lenght, "--VALIDATION_STRINGENCY", inputs.validation_stringency, "--ASSUME_SORTED", inputs.assume_sorted, ] if 0 <= inputs.mismatch_rate <= 1: args.extend(["--MAX_MISMATCH_RATE", inputs.mismatch_rate]) return_code, _, _ = Cmd["gatk"]["CollectRrbsMetrics"][args] & TEE( retcode=None) if return_code: self.error("CollectRrbsMetrics tool failed.") report_file = f"{name}_rrbs_summary_metrics.txt" os.rename(f"{name}.rrbs_summary_metrics", report_file) detailed_file = f"{name}_rrbs_detail_metrics.txt" os.rename(f"{name}.rrbs_detail_metrics", detailed_file) out_plot = f"{name}_rrbs_qc.pdf" os.rename(f"{name}.rrbs_qc.pdf", out_plot) outputs.report = report_file outputs.detailed_report = detailed_file outputs.plot = out_plot outputs.species = inputs.bam.species outputs.build = inputs.bam.build
def gatk_variants_to_table(self, in1, sample, exe): """Transforms a .vcf file into a table""" out = f"{self.local_output}/GATK/tables/{sample}-raw.snps.table" if exe == 1: os.chdir(self.home) args = [ self.gatk, 'VariantsToTable', '-V', in1, '-F', 'CHROM', '-F', 'POS', '-F', 'REF', '-F', 'ALT', '-GF', 'DP', '-GF', 'AD', '-GF', 'GQ', '-GF', 'PL', '-O', out, ] result = Cmd['java']['-jar'][args] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes= f'[GATK VariantsToTable] Sample {sample} table creation was successful.', logger_error_mes= f'[GATK VariantsToTable] For some reason the table creation of {sample} ' f'was not successful.', runtime_error_mes=f'GATK VariantsToTable failed ({sample}).') return out
def run(self, inputs, outputs): """Run analysis.""" basename = os.path.basename(inputs.bam.bam.path) assert basename.endswith(".bam") name = basename[:-4] metrics_file = f"{name}_insert_size_metrics.txt" histogram_file = f"{name}_insert_size.pdf" args = [ "--INPUT", inputs.bam.bam.path, "--OUTPUT", metrics_file, "--Histogram_FILE", histogram_file, "--REFERENCE_SEQUENCE", inputs.genome.fasta.path, "--DEVIATIONS", inputs.deviations, "--INCLUDE_DUPLICATES", inputs.include_duplicates, "--VALIDATION_STRINGENCY", inputs.validation_stringency, "--ASSUME_SORTED", inputs.assume_sorted, ] if 0 <= inputs.minimum_fraction <= 0.5: args.extend(["--MINIMUM_PCT", inputs.minimum_fraction]) else: self.warning( "Minimum fraction of reads should be between 0 and 0.5. " "Setting minimum fraction of reads to 0." ) args.extend(["--MINIMUM_PCT", 0]) return_code, _, _ = Cmd["gatk"]["CollectInsertSizeMetrics"][args] & TEE( retcode=None ) if return_code: self.error("CollectInsertSizeMetrics tool failed.") outputs.report = metrics_file outputs.plot = histogram_file outputs.species = inputs.bam.species outputs.build = inputs.bam.build
def run(self, inputs, outputs): """Run the analysis.""" exp = inputs.exp.import_file(imported_format="compressed") exp_stem = Path(exp).stem supported_extensions = (".tab", ".tsv", ".txt") if not exp_stem.endswith(supported_extensions): self.error( "The imported file has unsupported file name extension. " f"The supported extensions are {supported_extensions}.") name = exp_stem[:-4] expression_to_json(exp, "json.txt") exp_set_args = [ "--expressions", exp, "--source_db", inputs.source, "--species", inputs.exp_unmapped.output.species, "--output_name", name + "_expressions", "--expressions_type", inputs.exp_unmapped.output.exp_type, ] return_code, _, _ = Cmd["create_expression_set.py"][ exp_set_args] & TEE(retcode=None) if return_code: self.error("Error while preparing the expression set file.") if inputs.exp_unmapped.output.platform_id: outputs.platform_id = inputs.exp_unmapped.output.platform_id outputs.exp = exp outputs.exp_json = "json.txt" outputs.exp_type = inputs.exp_unmapped.output.exp_type outputs.platform = inputs.exp_unmapped.output.platform outputs.exp_set = name + "_expressions.txt.gz" outputs.exp_set_json = name + "_expressions.json" outputs.source = inputs.source outputs.species = inputs.exp_unmapped.output.species outputs.build = inputs.build outputs.feature_type = "gene" outputs.probe_mapping = inputs.probe_mapping
def run(self, inputs, outputs): """Run the analysis.""" basename = os.path.basename(inputs.mr.mr.path) assert basename.endswith(".mr.gz") name = basename[:-6] report_file = f"{name}_spikein_bsrate.txt" skip_process = inputs.skip try: inputs.mr.spikein_mr.path except AttributeError: self.warning( "Selected sample lacks the alignment file for unmethylated control reads." ) skip_process = True try: inputs.sequence.fasta.path except AttributeError: self.warning("Unmethylated control sequence was not provided.") skip_process = True if not skip_process: (Cmd["pigz"]["-cd", inputs.mr.spikein_mr.path] > f"{name}.mr")() args = [ "-chrom", inputs.sequence.fasta.path, "-output", report_file, ] if inputs.count_all: args.append("-all") if inputs.max_mismatch: args.extend(["-max", inputs.max_mismatch]) if inputs.a_rich: args.append("-a-rich") return_code, _, _ = Cmd["bsrate"][args][f"{name}.mr"] & TEE(retcode=None) if return_code: self.error("Bsrate analysis failed.") else: with open(report_file, "w") as f: f.write("Bisulfite conversion rate process skipped.") outputs.report = report_file
def samtools_index(self, sample, in1, exe): """Create index with samtools.""" if exe == 1: os.chdir(self.home) samtools = Cmd['samtools'] result = samtools['index', in1] & TEE(retcode=None) interpret_result( log=self.logger, so_logger=self.st_logger, report=result, logger_mes= f'[Samtools index] Sample {sample} indexing successfully.', logger_error_mes= f'[Samtools index] For some reasonthe indexing of {sample} was not successful.', runtime_error_mes=f'Samtools index failed ({sample}).')
def run(self, inputs, outputs): """Run analysis.""" basename = os.path.basename(inputs.bam.output.bam.path) assert basename.endswith(".bam") name = basename[:-4] metrics_file = f"{name}_wgs_metrics.txt" args = [ "--INPUT", inputs.bam.output.bam.path, "--OUTPUT", metrics_file, "--REFERENCE_SEQUENCE", inputs.genome.output.fasta.path, "--READ_LENGTH", inputs.read_length, "--INCLUDE_BQ_HISTOGRAM", inputs.create_histogram, "--MINIMUM_MAPPING_QUALITY", inputs.options.min_map_quality, "--MINIMUM_BASE_QUALITY", inputs.options.min_quality, "--COVERAGE_CAP", inputs.options.coverage_cap, "--LOCUS_ACCUMULATION_CAP", inputs.options.accumulation_cap, "--COUNT_UNPAIRED", inputs.options.count_unpaired, "--SAMPLE_SIZE", inputs.options.sample_size, "--VALIDATION_STRINGENCY", inputs.options.validation_stringency, ] return_code, _, _ = Cmd["gatk"]["CollectWgsMetrics"][args] & TEE( retcode=None) if return_code: self.error("CollectWgsMetrics tool failed.") replace_metrics_class(metrics_file) outputs.report = metrics_file outputs.species = inputs.bam.output.species outputs.build = inputs.bam.output.build
def firewallrule(self,rulename): """If given, firewall rule is created. If no value specified, rule name would be defaulted to locustweb""" if not rulename: rulename = "locustweb" #check if the firewall rule exists gcloud compute firewall-rules list | grep locustweb print(info | "existinng firewall rules") rcode, sout, serr =gcloud["compute","firewall-rules","list"] & TEE() for line in sout.splitlines(): if rulename in line: print("Firewall rule already exists") return print(info | "creating firewall rule") gcloud["compute", "firewall-rules", "create", rulename, "--allow=tcp:8089"]() print(info | "firewall rule created")