def _run_qsignature_generator(bam_file, data, out_dir): """ Run SignatureGenerator to create normalize vcf that later will be input of qsignature_summary :param bam_file: (str) path of the bam_file :param data: (list) list containing the all the dictionary for this sample :param out_dir: (str) path of the output :returns: (dict) dict with the normalize vcf file """ position = dd.get_qsig_file(data) mixup_check = dd.get_mixup_check(data) if mixup_check and mixup_check.startswith("qsignature"): if not position: logger.info("There is no qsignature for this species: %s" % tz.get_in(['genome_build'], data)) return {} jvm_opts = "-Xms750m -Xmx2g" limit_reads = 20000000 if mixup_check == "qsignature_full": slice_bam = bam_file jvm_opts = "-Xms750m -Xmx8g" limit_reads = 100000000 else: slice_bam = _slice_chr22(bam_file, data) qsig = config_utils.get_program("qsignature", data["config"]) if not qsig: return {} utils.safe_makedir(out_dir) out_name = os.path.basename(slice_bam).replace("bam", "qsig.vcf") out_file = os.path.join(out_dir, out_name) log_file = os.path.join(out_dir, "qsig.log") cores = dd.get_cores(data) base_cmd = ("{qsig} {jvm_opts} " "org.qcmg.sig.SignatureGenerator " "--noOfThreads {cores} " "-log {log_file} -i {position} " "-i {down_file} ") if not os.path.exists(out_file): down_file = bam.downsample(slice_bam, data, limit_reads) if not down_file: down_file = slice_bam file_qsign_out = "{0}.qsig.vcf".format(down_file) do.run(base_cmd.format(**locals()), "qsignature vcf generation: %s" % data["name"][-1]) if os.path.exists(file_qsign_out): with file_transaction(data, out_file) as file_txt_out: shutil.move(file_qsign_out, file_txt_out) else: raise IOError("File doesn't exist %s" % file_qsign_out) return {'qsig_vcf': out_file} return {}
def run(bam_file, data, out_dir): """ Run SignatureGenerator to create normalize vcf that later will be input of qsignature_summary :param bam_file: (str) path of the bam_file :param data: (list) list containing the all the dictionary for this sample :param out_dir: (str) path of the output :returns: (string) output normalized vcf file """ qsig = config_utils.get_program("qsignature", data["config"]) res_qsig = config_utils.get_resources("qsignature", data["config"]) jvm_opts = " ".join(res_qsig.get("jvm_opts", ["-Xms750m", "-Xmx8g"])) if not qsig: logger.info("There is no qsignature tool. Skipping...") return None position = dd.get_qsig_file(data) mixup_check = dd.get_mixup_check(data) if mixup_check and mixup_check.startswith("qsignature"): utils.safe_makedir(out_dir) if not position: logger.info("There is no qsignature for this species: %s" % tz.get_in(['genome_build'], data)) return None if mixup_check == "qsignature_full": down_bam = bam_file else: down_bam = _slice_bam_chr21(bam_file, data) position = _slice_vcf_chr21(position, out_dir) out_name = os.path.basename(down_bam).replace("bam", "qsig.vcf") out_file = os.path.join(out_dir, out_name) log_file = os.path.join(out_dir, "qsig.log") cores = dd.get_cores(data) base_cmd = ("{qsig} {jvm_opts} " "org.qcmg.sig.SignatureGenerator " "--noOfThreads {cores} " "-log {log_file} -i {position} " "-i {down_bam} ") if not os.path.exists(out_file): file_qsign_out = "{0}.qsig.vcf".format(down_bam) do.run(base_cmd.format(**locals()), "qsignature vcf generation: %s" % dd.get_sample_name(data)) if os.path.exists(file_qsign_out): with file_transaction(data, out_file) as file_txt_out: shutil.move(file_qsign_out, file_txt_out) else: raise IOError("File doesn't exist %s" % file_qsign_out) return out_file return None
def _parse_qsignature_output(in_file, out_file, warning_file, data): """ Parse xml file produced by qsignature :param in_file: (str) with the path to the xml file :param out_file: (str) with the path to output file :param warning_file: (str) with the path to warning file :returns: (list) with samples that could be duplicated """ import lxml name = {} error, warnings, similar = set(), set(), set() same, replicate, related = 0, 0.1, 0.18 mixup_check = dd.get_mixup_check(data) if mixup_check == "qsignature_full": same, replicate, related = 0, 0.01, 0.061 with open(in_file, 'r') as in_handle: with file_transaction(data, out_file) as out_tx_file: with file_transaction(data, warning_file) as warn_tx_file: with open(out_tx_file, 'w') as out_handle: with open(warn_tx_file, 'w') as warn_handle: et = lxml.etree.parse(in_handle) for i in list(et.iter('file')): name[i.attrib['id']] = os.path.basename( i.attrib['name']).replace(".qsig.vcf", "") for i in list(et.iter('comparison')): msg = None pair = "-".join([ name[i.attrib['file1']], name[i.attrib['file2']] ]) out_handle.write( "%s\t%s\t%s\n" % (name[i.attrib['file1']], name[i.attrib['file2']], i.attrib['score'])) if float(i.attrib['score']) == same: msg = 'qsignature ERROR: read same samples:%s\n' error.add(pair) elif float(i.attrib['score']) < replicate: msg = 'qsignature WARNING: read similar/replicate samples:%s\n' warnings.add(pair) elif float(i.attrib['score']) < related: msg = 'qsignature NOTE: read relative samples:%s\n' similar.add(pair) if msg: logger.info(msg % pair) warn_handle.write(msg % pair) return error, warnings, similar
def _parse_qsignature_output(in_file, out_file, warning_file, data): """ Parse xml file produced by qsignature :param in_file: (str) with the path to the xml file :param out_file: (str) with the path to output file :param warning_file: (str) with the path to warning file :returns: (list) with samples that could be duplicated """ name = {} error, warnings, similar = set(), set(), set() same, replicate, related = 0, 0.1, 0.18 mixup_check = dd.get_mixup_check(data) if mixup_check == "qsignature_full": same, replicate, related = 0, 0.01, 0.061 with open(in_file, 'r') as in_handle: with file_transaction(data, out_file) as out_tx_file: with file_transaction(data, warning_file) as warn_tx_file: with open(out_tx_file, 'w') as out_handle: with open(warn_tx_file, 'w') as warn_handle: et = lxml.etree.parse(in_handle) for i in list(et.iter('file')): name[i.attrib['id']] = os.path.basename(i.attrib['name']).replace(".qsig.vcf", "") for i in list(et.iter('comparison')): msg = None pair = "-".join([name[i.attrib['file1']], name[i.attrib['file2']]]) out_handle.write("%s\t%s\t%s\n" % (name[i.attrib['file1']], name[i.attrib['file2']], i.attrib['score'])) if float(i.attrib['score']) == same: msg = 'qsignature ERROR: read same samples:%s\n' error.add(pair) elif float(i.attrib['score']) < replicate: msg = 'qsignature WARNING: read similar/replicate samples:%s\n' warnings.add(pair) elif float(i.attrib['score']) < related: msg = 'qsignature NOTE: read relative samples:%s\n' similar.add(pair) if msg: logger.info(msg % pair) warn_handle.write(msg % pair) return error, warnings, similar