Ejemplo n.º 1
0
def _add_reject_flag(in_file, config):

    """Add REJECT flag to all records that aren't flagged somatic
    (SS=2)"""

    Filter = namedtuple('Filter', ['id', 'desc'])
    reject_filter = Filter(id='REJECT',
                           desc='Rejected as non-SOMATIC or by quality')
    # NOTE: PyVCF will write an uncompressed VCF
    base, ext = utils.splitext_plus(in_file)
    name = "rejectfix"
    out_file = "{0}-{1}{2}".format(base, name, ".vcf")

    if utils.file_exists(in_file):
        reader = vcf.VCFReader(filename=in_file)
        # Add info to the header of the reader
        reader.filters["REJECT"] = reject_filter
        with file_transaction(out_file) as tx_out_file:
            with open(tx_out_file, "wb") as handle:
                writer = vcf.VCFWriter(handle, template=reader)
                for record in reader:
                    if "SS" in record.INFO:
                        # VarScan encodes it as a string
                        # TODO: Set it as integer when cleaning

                        if record.INFO["SS"] != "2":
                            record.add_filter("REJECT")
                    writer.write_record(record)

        # Re-compress the file
        out_file = bgzip_and_index(out_file, config)
        move_vcf(in_file, "{0}.orig".format(in_file))
        move_vcf(out_file, in_file)
        with open(out_file, "w") as out_handle:
            out_handle.write("Moved to {0}".format(in_file))
Ejemplo n.º 2
0
def _add_reject_flag(in_file, config):
    """Add REJECT flag to all records that aren't flagged somatic
    (SS=2)"""

    Filter = namedtuple('Filter', ['id', 'desc'])
    reject_filter = Filter(id='REJECT',
                           desc='Rejected as non-SOMATIC or by quality')
    # NOTE: PyVCF will write an uncompressed VCF
    base, ext = utils.splitext_plus(in_file)
    name = "rejectfix"
    out_file = "{0}-{1}{2}".format(base, name, ".vcf")

    if utils.file_exists(in_file):
        reader = vcf.VCFReader(filename=in_file)
        # Add info to the header of the reader
        reader.filters["REJECT"] = reject_filter
        with file_transaction(config, out_file) as tx_out_file:
            with open(tx_out_file, "wb") as handle:
                writer = vcf.VCFWriter(handle, template=reader)
                for record in reader:
                    if "SS" in record.INFO:
                        # VarScan encodes it as a string
                        # TODO: Set it as integer when cleaning

                        if record.INFO["SS"] != "2":
                            record.add_filter("REJECT")
                    writer.write_record(record)

        # Re-compress the file
        out_file = bgzip_and_index(out_file, config)
        move_vcf(in_file, "{0}.orig".format(in_file))
        move_vcf(out_file, in_file)
        with open(out_file, "w") as out_handle:
            out_handle.write("Moved to {0}".format(in_file))
Ejemplo n.º 3
0
def fix_somatic_calls(in_file, config):
    """Fix somatic variant output, standardize it to the SOMATIC flag.
    """
    if vcf is None:
        raise ImportError("Require PyVCF for manipulating cancer VCFs")

    # HACK: Needed to replicate the structure used by PyVCF
    Info = namedtuple('Info', ['id', 'num', 'type', 'desc'])
    somatic_info = Info(id='SOMATIC', num=0, type='Flag', desc='Somatic event')
    Filter = namedtuple('Filter', ['id', 'desc'])
    reject_filter = Filter(id='REJECT',
                           desc='Rejected as non-SOMATIC or by quality')
    # NOTE: PyVCF will write an uncompressed VCF
    base, ext = utils.splitext_plus(in_file)
    name = "somaticfix"
    out_file = "{0}-{1}{2}".format(base, name, ".vcf")

    if utils.file_exists(in_file):
        reader = vcf.VCFReader(filename=in_file)
        # Add info to the header of the reader
        reader.infos["SOMATIC"] = somatic_info
        reader.filters["REJECT"] = reject_filter
        for ext in [".gz", ".gz.tbi"]:
            if os.path.exists(out_file + ext):
                os.remove(out_file + ext)
        with file_transaction(config, out_file) as tx_out_file:
            with open(tx_out_file, "wb") as handle:
                writer = vcf.VCFWriter(handle, template=reader)
                for record in reader:
                    # Handle FreeBayes
                    is_somatic = False
                    if "VT" in record.INFO:
                        if record.INFO["VT"] == "somatic":
                            record.add_info("SOMATIC", True)
                            is_somatic = True
                        # Discard old record
                        del record.INFO["VT"]
                    if not is_somatic:
                        record.add_filter("REJECT")
                    writer.write_record(record)

        # Re-compress the file
        out_file = bgzip_and_index(out_file, config)
        move_vcf(in_file, "{0}.orig".format(in_file))
        move_vcf(out_file, in_file)
        with open(out_file, "w") as out_handle:
            out_handle.write("Moved to {0}".format(in_file))
Ejemplo n.º 4
0
def fix_somatic_calls(in_file, config):
    """Fix somatic variant output, standardize it to the SOMATIC flag.
    """
    if vcf is None:
        raise ImportError("Require PyVCF for manipulating cancer VCFs")

    # HACK: Needed to replicate the structure used by PyVCF
    Info = namedtuple('Info', ['id', 'num', 'type', 'desc'])
    somatic_info = Info(id='SOMATIC', num=0, type='Flag', desc='Somatic event')
    Filter = namedtuple('Filter', ['id', 'desc'])
    reject_filter = Filter(id='REJECT', desc='Rejected as non-SOMATIC or by quality')
    # NOTE: PyVCF will write an uncompressed VCF
    base, ext = utils.splitext_plus(in_file)
    name = "somaticfix"
    out_file = "{0}-{1}{2}".format(base, name, ".vcf")

    if utils.file_exists(in_file):
        reader = vcf.VCFReader(filename=in_file)
        # Add info to the header of the reader
        reader.infos["SOMATIC"] = somatic_info
        reader.filters["REJECT"] = reject_filter
        for ext in [".gz", ".gz.tbi"]:
            if os.path.exists(out_file + ext):
                os.remove(out_file + ext)
        with file_transaction(config, out_file) as tx_out_file:
            with open(tx_out_file, "wb") as handle:
                writer = vcf.VCFWriter(handle, template=reader)
                for record in reader:
                    # Handle FreeBayes
                    is_somatic = False
                    if "VT" in record.INFO:
                        if record.INFO["VT"] == "somatic":
                            record.add_info("SOMATIC", True)
                            is_somatic = True
                        # Discard old record
                        del record.INFO["VT"]
                    if not is_somatic:
                        record.add_filter("REJECT")
                    writer.write_record(record)

        # Re-compress the file
        out_file = bgzip_and_index(out_file, config)
        move_vcf(in_file, "{0}.orig".format(in_file))
        move_vcf(out_file, in_file)
        with open(out_file, "w") as out_handle:
            out_handle.write("Moved to {0}".format(in_file))
Ejemplo n.º 5
0
def clean_vcf_output(orig_file, clean_fn, config, name="clean"):
    """Provide framework to clean a file in-place, with the specified clean
    function.
    """
    base, ext = utils.splitext_plus(orig_file)
    out_file = "{0}-{1}{2}".format(base, name, ext)
    if not utils.file_exists(out_file):
        with open(orig_file) as in_handle:
            with file_transaction(config, out_file) as tx_out_file:
                with open(tx_out_file, "w") as out_handle:
                    for line in in_handle:
                        update_line = clean_fn(line)
                        if update_line:
                            out_handle.write(update_line)
        move_vcf(orig_file, "{0}.orig".format(orig_file))
        move_vcf(out_file, orig_file)
        with open(out_file, "w") as out_handle:
            out_handle.write("Moved to {0}".format(orig_file))
Ejemplo n.º 6
0
def clean_vcf_output(orig_file, clean_fn, config, name="clean"):
    """Provide framework to clean a file in-place, with the specified clean
    function.
    """
    base, ext = utils.splitext_plus(orig_file)
    out_file = "{0}-{1}{2}".format(base, name, ext)
    if not utils.file_exists(out_file):
        with open(orig_file) as in_handle:
            with file_transaction(config, out_file) as tx_out_file:
                with open(tx_out_file, "w") as out_handle:
                    for line in in_handle:
                        update_line = clean_fn(line)
                        if update_line:
                            out_handle.write(update_line)
        move_vcf(orig_file, "{0}.orig".format(orig_file))
        move_vcf(out_file, orig_file)
        with open(out_file, "w") as out_handle:
            out_handle.write("Moved to {0}".format(orig_file))