def _add_reject_flag(in_file, config): """Add REJECT flag to all records that aren't flagged somatic (SS=2)""" Filter = namedtuple('Filter', ['id', 'desc']) reject_filter = Filter(id='REJECT', desc='Rejected as non-SOMATIC or by quality') # NOTE: PyVCF will write an uncompressed VCF base, ext = utils.splitext_plus(in_file) name = "rejectfix" out_file = "{0}-{1}{2}".format(base, name, ".vcf") if utils.file_exists(in_file): reader = vcf.VCFReader(filename=in_file) # Add info to the header of the reader reader.filters["REJECT"] = reject_filter with file_transaction(out_file) as tx_out_file: with open(tx_out_file, "wb") as handle: writer = vcf.VCFWriter(handle, template=reader) for record in reader: if "SS" in record.INFO: # VarScan encodes it as a string # TODO: Set it as integer when cleaning if record.INFO["SS"] != "2": record.add_filter("REJECT") writer.write_record(record) # Re-compress the file out_file = bgzip_and_index(out_file, config) move_vcf(in_file, "{0}.orig".format(in_file)) move_vcf(out_file, in_file) with open(out_file, "w") as out_handle: out_handle.write("Moved to {0}".format(in_file))
def _add_reject_flag(in_file, config): """Add REJECT flag to all records that aren't flagged somatic (SS=2)""" Filter = namedtuple('Filter', ['id', 'desc']) reject_filter = Filter(id='REJECT', desc='Rejected as non-SOMATIC or by quality') # NOTE: PyVCF will write an uncompressed VCF base, ext = utils.splitext_plus(in_file) name = "rejectfix" out_file = "{0}-{1}{2}".format(base, name, ".vcf") if utils.file_exists(in_file): reader = vcf.VCFReader(filename=in_file) # Add info to the header of the reader reader.filters["REJECT"] = reject_filter with file_transaction(config, out_file) as tx_out_file: with open(tx_out_file, "wb") as handle: writer = vcf.VCFWriter(handle, template=reader) for record in reader: if "SS" in record.INFO: # VarScan encodes it as a string # TODO: Set it as integer when cleaning if record.INFO["SS"] != "2": record.add_filter("REJECT") writer.write_record(record) # Re-compress the file out_file = bgzip_and_index(out_file, config) move_vcf(in_file, "{0}.orig".format(in_file)) move_vcf(out_file, in_file) with open(out_file, "w") as out_handle: out_handle.write("Moved to {0}".format(in_file))
def fix_somatic_calls(in_file, config): """Fix somatic variant output, standardize it to the SOMATIC flag. """ if vcf is None: raise ImportError("Require PyVCF for manipulating cancer VCFs") # HACK: Needed to replicate the structure used by PyVCF Info = namedtuple('Info', ['id', 'num', 'type', 'desc']) somatic_info = Info(id='SOMATIC', num=0, type='Flag', desc='Somatic event') Filter = namedtuple('Filter', ['id', 'desc']) reject_filter = Filter(id='REJECT', desc='Rejected as non-SOMATIC or by quality') # NOTE: PyVCF will write an uncompressed VCF base, ext = utils.splitext_plus(in_file) name = "somaticfix" out_file = "{0}-{1}{2}".format(base, name, ".vcf") if utils.file_exists(in_file): reader = vcf.VCFReader(filename=in_file) # Add info to the header of the reader reader.infos["SOMATIC"] = somatic_info reader.filters["REJECT"] = reject_filter for ext in [".gz", ".gz.tbi"]: if os.path.exists(out_file + ext): os.remove(out_file + ext) with file_transaction(config, out_file) as tx_out_file: with open(tx_out_file, "wb") as handle: writer = vcf.VCFWriter(handle, template=reader) for record in reader: # Handle FreeBayes is_somatic = False if "VT" in record.INFO: if record.INFO["VT"] == "somatic": record.add_info("SOMATIC", True) is_somatic = True # Discard old record del record.INFO["VT"] if not is_somatic: record.add_filter("REJECT") writer.write_record(record) # Re-compress the file out_file = bgzip_and_index(out_file, config) move_vcf(in_file, "{0}.orig".format(in_file)) move_vcf(out_file, in_file) with open(out_file, "w") as out_handle: out_handle.write("Moved to {0}".format(in_file))
def clean_vcf_output(orig_file, clean_fn, config, name="clean"): """Provide framework to clean a file in-place, with the specified clean function. """ base, ext = utils.splitext_plus(orig_file) out_file = "{0}-{1}{2}".format(base, name, ext) if not utils.file_exists(out_file): with open(orig_file) as in_handle: with file_transaction(config, out_file) as tx_out_file: with open(tx_out_file, "w") as out_handle: for line in in_handle: update_line = clean_fn(line) if update_line: out_handle.write(update_line) move_vcf(orig_file, "{0}.orig".format(orig_file)) move_vcf(out_file, orig_file) with open(out_file, "w") as out_handle: out_handle.write("Moved to {0}".format(orig_file))