예제 #1
0
    def _create_vcf_readers(pair_tuples):
        vcf_readers = []
        for vcf_file_reader, hc_file_reader in pair_tuples:
            if vcf_file_reader and hc_file_reader:
                vcf_reader = _VarscanVcfReader(vcf.VcfReader(vcf_file_reader),
                                               hc_file_reader)
            elif vcf_file_reader and not hc_file_reader:
                vcf_reader = _VarscanVcfReader(vcf.VcfReader(vcf_file_reader))
            vcf_readers.append(vcf_reader)

        return vcf_readers
예제 #2
0
    def _create_vcf_readers(prefix_to_readers):
        vcf_readers = []
        for file_reader in prefix_to_readers.values():
            vcf_reader = vcf.VcfReader(file_reader)
            vcf_readers.append(_StrelkaVcfReader(vcf_reader))

        return vcf_readers
예제 #3
0
def execute(args, execution_context):
    input_file = os.path.abspath(args.input)
    output = os.path.abspath(args.output)

    summary_caller = summarize_caller.SummarizeCaller()

    vcf_reader = vcf.VcfReader(vcf.FileReader(input_file))
    tmp_output_file = output + ".tmp"
    tmp_writer = vcf.FileWriter(tmp_output_file)

    _write_to_tmp_file(summary_caller, vcf_reader, tmp_writer)

    tmp_reader = vcf.VcfReader(vcf.FileReader(tmp_output_file))
    file_writer = vcf.FileWriter(output)

    logger.info("Calculating zscores")
    caller = zscore_caller.ZScoreCaller(tmp_reader)
    metaheaders = execution_context + summary_caller.get_metaheaders()
    _write_zscores(caller, metaheaders, tmp_reader, file_writer)

    os.remove(tmp_output_file)
예제 #4
0
def execute(args, dummy_execution_context):
    #for the moment, there is no good place to put the execution context
    input_file = os.path.abspath(args.input)
    output_file = os.path.abspath(args.output)
    col_spec = None
    if args.selected_columns_file:
        col_spec = args.selected_columns_file

    logger.debug("Expanding [{}] to [{}]", input_file, output_file)
    logger.info("Expanding [{}] to [{}]", args.input, args.original_output)

    vcf_reader = vcf.VcfReader(vcf.FileReader(input_file))
    file_writer = vcf.FileWriter(output_file)
    file_writer.open()

    (columns, glossary_fields) = _get_actual_columns(vcf_reader, col_spec)

    file_writer.write("#" + "\t".join(columns) + "\n")

    line_count = 0
    vcf_reader.open()
    for vcf_record in vcf_reader.vcf_records():
        row_dict = _create_row_dict(vcf_reader.split_column_header, vcf_record)

        new_line = []
        for col in columns:
            if col in row_dict:
                new_line.append(row_dict[col])
            else:
                new_line.append(".")

        file_writer.write("\t".join(new_line) + "\n")
        line_count += 1
        if line_count % 10000 == 0:
            logger.info("Expanding: {} rows processed", line_count)
    logger.info("Expand complete: {} rows processed", line_count)

    file_writer.close()

    glossary_writer = _get_glossary_writer(output_file)
    glossary_writer.open()
    _create_glossary(vcf_reader.metaheaders, glossary_fields, glossary_writer)
    glossary_writer.close()
    logger.info("Wrote glossary to [{}]",
                os.path.basename(glossary_writer.output_filepath))

    vcf_reader.close()
    logger.debug("Wrote input [{}] to output [{}]", input_file, output_file)
예제 #5
0
    def claim(self, file_readers):
        """Recognizes and claims MuTect VCFs form the set of all input VCFs.

        Each defined caller has a chance to evaluate and claim all the incoming
        files as something that it can process.

        Args:
            file_readers: the collection of currently unclaimed files

        Returns:
            A tuple of unclaimed readers and MuTectVcfReaders.
        """
        unclaimed_readers = []
        vcf_readers = []
        for file_reader in file_readers:
            if self._is_mutect_vcf(file_reader):
                vcf_reader = vcf.VcfReader(file_reader)
                vcf_readers.append(_MutectVcfReader(vcf_reader))
            else:
                unclaimed_readers.append(file_reader)
        return (unclaimed_readers, vcf_readers)
예제 #6
0
def validate_args(args):
    if args.selected_columns_file:
        if not os.path.isfile(args.selected_columns_file):
            raise utils.UsageError(("The selected_columns_file [{}] could "
                                    "not be read. Review inputs/usage and "
                                    "try again."), args.selected_columns_file)
        columns = None
        try:
            columns = _read_col_spec(args.selected_columns_file)
        except:
            pass
        if not columns:
            raise utils.UsageError(
                "The selected_columns_file .* has no rows. Review inputs/usage and try again"
            )

    try:
        vcf.VcfReader(vcf.FileReader(args.input))
    except:
        raise utils.UsageError(
            ("The expand command requires a VCF file as an "
             "input, but the specified input [{}] contains no VCF "
             "metaheaders. Review inputs and try again.").format(args.input))
예제 #7
0
 def _is_varscan_vcf(file_reader):
     if file_reader.file_name.endswith(".vcf"):
         vcf_reader = vcf.VcfReader(file_reader)
         return "##source=VarScan2" in vcf_reader.metaheaders
     return False
예제 #8
0
 def _is_mutect_vcf(file_reader):
     if not file_reader.file_name.lower().endswith(".vcf"):
         return False
     vcf_reader = vcf.VcfReader(file_reader)
     return _get_mutect_parser(vcf_reader.metaheaders) != None
예제 #9
0
 def _is_strelka_vcf(file_reader):
     if file_reader.file_name.endswith(".vcf"):
         vcf_reader = vcf.VcfReader(file_reader)
         return "##source=strelka" in vcf_reader.metaheaders
     return False