def _create_vcf_readers(pair_tuples): vcf_readers = [] for vcf_file_reader, hc_file_reader in pair_tuples: if vcf_file_reader and hc_file_reader: vcf_reader = _VarscanVcfReader(vcf.VcfReader(vcf_file_reader), hc_file_reader) elif vcf_file_reader and not hc_file_reader: vcf_reader = _VarscanVcfReader(vcf.VcfReader(vcf_file_reader)) vcf_readers.append(vcf_reader) return vcf_readers
def _create_vcf_readers(prefix_to_readers): vcf_readers = [] for file_reader in prefix_to_readers.values(): vcf_reader = vcf.VcfReader(file_reader) vcf_readers.append(_StrelkaVcfReader(vcf_reader)) return vcf_readers
def execute(args, execution_context): input_file = os.path.abspath(args.input) output = os.path.abspath(args.output) summary_caller = summarize_caller.SummarizeCaller() vcf_reader = vcf.VcfReader(vcf.FileReader(input_file)) tmp_output_file = output + ".tmp" tmp_writer = vcf.FileWriter(tmp_output_file) _write_to_tmp_file(summary_caller, vcf_reader, tmp_writer) tmp_reader = vcf.VcfReader(vcf.FileReader(tmp_output_file)) file_writer = vcf.FileWriter(output) logger.info("Calculating zscores") caller = zscore_caller.ZScoreCaller(tmp_reader) metaheaders = execution_context + summary_caller.get_metaheaders() _write_zscores(caller, metaheaders, tmp_reader, file_writer) os.remove(tmp_output_file)
def execute(args, dummy_execution_context): #for the moment, there is no good place to put the execution context input_file = os.path.abspath(args.input) output_file = os.path.abspath(args.output) col_spec = None if args.selected_columns_file: col_spec = args.selected_columns_file logger.debug("Expanding [{}] to [{}]", input_file, output_file) logger.info("Expanding [{}] to [{}]", args.input, args.original_output) vcf_reader = vcf.VcfReader(vcf.FileReader(input_file)) file_writer = vcf.FileWriter(output_file) file_writer.open() (columns, glossary_fields) = _get_actual_columns(vcf_reader, col_spec) file_writer.write("#" + "\t".join(columns) + "\n") line_count = 0 vcf_reader.open() for vcf_record in vcf_reader.vcf_records(): row_dict = _create_row_dict(vcf_reader.split_column_header, vcf_record) new_line = [] for col in columns: if col in row_dict: new_line.append(row_dict[col]) else: new_line.append(".") file_writer.write("\t".join(new_line) + "\n") line_count += 1 if line_count % 10000 == 0: logger.info("Expanding: {} rows processed", line_count) logger.info("Expand complete: {} rows processed", line_count) file_writer.close() glossary_writer = _get_glossary_writer(output_file) glossary_writer.open() _create_glossary(vcf_reader.metaheaders, glossary_fields, glossary_writer) glossary_writer.close() logger.info("Wrote glossary to [{}]", os.path.basename(glossary_writer.output_filepath)) vcf_reader.close() logger.debug("Wrote input [{}] to output [{}]", input_file, output_file)
def claim(self, file_readers): """Recognizes and claims MuTect VCFs form the set of all input VCFs. Each defined caller has a chance to evaluate and claim all the incoming files as something that it can process. Args: file_readers: the collection of currently unclaimed files Returns: A tuple of unclaimed readers and MuTectVcfReaders. """ unclaimed_readers = [] vcf_readers = [] for file_reader in file_readers: if self._is_mutect_vcf(file_reader): vcf_reader = vcf.VcfReader(file_reader) vcf_readers.append(_MutectVcfReader(vcf_reader)) else: unclaimed_readers.append(file_reader) return (unclaimed_readers, vcf_readers)
def validate_args(args): if args.selected_columns_file: if not os.path.isfile(args.selected_columns_file): raise utils.UsageError(("The selected_columns_file [{}] could " "not be read. Review inputs/usage and " "try again."), args.selected_columns_file) columns = None try: columns = _read_col_spec(args.selected_columns_file) except: pass if not columns: raise utils.UsageError( "The selected_columns_file .* has no rows. Review inputs/usage and try again" ) try: vcf.VcfReader(vcf.FileReader(args.input)) except: raise utils.UsageError( ("The expand command requires a VCF file as an " "input, but the specified input [{}] contains no VCF " "metaheaders. Review inputs and try again.").format(args.input))
def _is_varscan_vcf(file_reader): if file_reader.file_name.endswith(".vcf"): vcf_reader = vcf.VcfReader(file_reader) return "##source=VarScan2" in vcf_reader.metaheaders return False
def _is_mutect_vcf(file_reader): if not file_reader.file_name.lower().endswith(".vcf"): return False vcf_reader = vcf.VcfReader(file_reader) return _get_mutect_parser(vcf_reader.metaheaders) != None
def _is_strelka_vcf(file_reader): if file_reader.file_name.endswith(".vcf"): vcf_reader = vcf.VcfReader(file_reader) return "##source=strelka" in vcf_reader.metaheaders return False