Beispiel #1
0
def _run_qsignature_generator(bam_file, data, out_dir):
    """ Run SignatureGenerator to create normalize vcf that later will be input of qsignature_summary

    :param bam_file: (str) path of the bam_file
    :param data: (list) list containing the all the dictionary
                     for this sample
    :param out_dir: (str) path of the output

    :returns: (dict) dict with the normalize vcf file
    """
    position = dd.get_qsig_file(data)
    mixup_check = dd.get_mixup_check(data)
    if mixup_check and mixup_check.startswith("qsignature"):
        if not position:
            logger.info("There is no qsignature for this species: %s" %
                        tz.get_in(['genome_build'], data))
            return {}
        jvm_opts = "-Xms750m -Xmx2g"
        limit_reads = 20000000
        if mixup_check == "qsignature_full":
            slice_bam = bam_file
            jvm_opts = "-Xms750m -Xmx8g"
            limit_reads = 100000000
        else:
            slice_bam = _slice_chr22(bam_file, data)
        qsig = config_utils.get_program("qsignature", data["config"])
        if not qsig:
            return {}
        utils.safe_makedir(out_dir)
        out_name = os.path.basename(slice_bam).replace("bam", "qsig.vcf")
        out_file = os.path.join(out_dir, out_name)
        log_file = os.path.join(out_dir, "qsig.log")
        cores = dd.get_cores(data)
        base_cmd = ("{qsig} {jvm_opts} "
                    "org.qcmg.sig.SignatureGenerator "
                    "--noOfThreads {cores} "
                    "-log {log_file} -i {position} "
                    "-i {down_file} ")
        if not os.path.exists(out_file):
            down_file = bam.downsample(slice_bam, data, limit_reads)
            if not down_file:
                down_file = slice_bam
            file_qsign_out = "{0}.qsig.vcf".format(down_file)
            do.run(base_cmd.format(**locals()),
                   "qsignature vcf generation: %s" % data["name"][-1])
            if os.path.exists(file_qsign_out):
                with file_transaction(data, out_file) as file_txt_out:
                    shutil.move(file_qsign_out, file_txt_out)
            else:
                raise IOError("File doesn't exist %s" % file_qsign_out)
        return {'qsig_vcf': out_file}
    return {}
def run(bam_file, data, out_dir):
    """ Run SignatureGenerator to create normalize vcf that later will be input of qsignature_summary

    :param bam_file: (str) path of the bam_file
    :param data: (list) list containing the all the dictionary
                     for this sample
    :param out_dir: (str) path of the output

    :returns: (string) output normalized vcf file
    """
    qsig = config_utils.get_program("qsignature", data["config"])
    res_qsig = config_utils.get_resources("qsignature", data["config"])
    jvm_opts = " ".join(res_qsig.get("jvm_opts", ["-Xms750m", "-Xmx8g"]))
    if not qsig:
        logger.info("There is no qsignature tool. Skipping...")
        return None

    position = dd.get_qsig_file(data)
    mixup_check = dd.get_mixup_check(data)
    if mixup_check and mixup_check.startswith("qsignature"):
        utils.safe_makedir(out_dir)
        if not position:
            logger.info("There is no qsignature for this species: %s" %
                        tz.get_in(['genome_build'], data))
            return None
        if mixup_check == "qsignature_full":
            down_bam = bam_file
        else:
            down_bam = _slice_bam_chr21(bam_file, data)
            position = _slice_vcf_chr21(position, out_dir)

        out_name = os.path.basename(down_bam).replace("bam", "qsig.vcf")
        out_file = os.path.join(out_dir, out_name)
        log_file = os.path.join(out_dir, "qsig.log")
        cores = dd.get_cores(data)
        base_cmd = ("{qsig} {jvm_opts} "
                    "org.qcmg.sig.SignatureGenerator "
                    "--noOfThreads {cores} "
                    "-log {log_file} -i {position} "
                    "-i {down_bam} ")
        if not os.path.exists(out_file):
            file_qsign_out = "{0}.qsig.vcf".format(down_bam)
            do.run(base_cmd.format(**locals()),
                   "qsignature vcf generation: %s" % dd.get_sample_name(data))
            if os.path.exists(file_qsign_out):
                with file_transaction(data, out_file) as file_txt_out:
                    shutil.move(file_qsign_out, file_txt_out)
            else:
                raise IOError("File doesn't exist %s" % file_qsign_out)
        return out_file
    return None
def _run_qsignature_generator(bam_file, data, out_dir):
    """ Run SignatureGenerator to create normalize vcf that later will be input of qsignature_summary

    :param bam_file: (str) path of the bam_file
    :param data: (list) list containing the all the dictionary
                     for this sample
    :param out_dir: (str) path of the output

    :returns: (dict) dict with the normalize vcf file
    """
    position = dd.get_qsig_file(data)
    mixup_check = dd.get_mixup_check(data)
    if mixup_check and mixup_check.startswith("qsignature"):
        if not position:
            logger.info("There is no qsignature for this species: %s"
                        % tz.get_in(['genome_build'], data))
            return {}
        jvm_opts = "-Xms750m -Xmx2g"
        limit_reads = 20000000
        if mixup_check == "qsignature_full":
            slice_bam = bam_file
            jvm_opts = "-Xms750m -Xmx8g"
            limit_reads = 100000000
        else:
            slice_bam = _slice_chr22(bam_file, data)
        qsig = config_utils.get_program("qsignature", data["config"])
        if not qsig:
            return {}
        utils.safe_makedir(out_dir)
        out_name = os.path.basename(slice_bam).replace("bam", "qsig.vcf")
        out_file = os.path.join(out_dir, out_name)
        log_file = os.path.join(out_dir, "qsig.log")
        cores = dd.get_cores(data)
        base_cmd = ("{qsig} {jvm_opts} "
                    "org.qcmg.sig.SignatureGenerator "
                    "--noOfThreads {cores} "
                    "-log {log_file} -i {position} "
                    "-i {down_file} ")
        if not os.path.exists(out_file):
            down_file = bam.downsample(slice_bam, data, limit_reads)
            if not down_file:
                down_file = slice_bam
            file_qsign_out = "{0}.qsig.vcf".format(down_file)
            do.run(base_cmd.format(**locals()), "qsignature vcf generation: %s" % data["name"][-1])
            if os.path.exists(file_qsign_out):
                with file_transaction(data, out_file) as file_txt_out:
                    shutil.move(file_qsign_out, file_txt_out)
            else:
                raise IOError("File doesn't exist %s" % file_qsign_out)
        return {'qsig_vcf': out_file}
    return {}
Beispiel #4
0
def run(bam_file, data, out_dir):
    """ Run SignatureGenerator to create normalize vcf that later will be input of qsignature_summary

    :param bam_file: (str) path of the bam_file
    :param data: (list) list containing the all the dictionary
                     for this sample
    :param out_dir: (str) path of the output

    :returns: (string) output normalized vcf file
    """
    qsig = config_utils.get_program("qsignature", data["config"])
    res_qsig = config_utils.get_resources("qsignature", data["config"])
    jvm_opts = " ".join(res_qsig.get("jvm_opts", ["-Xms750m", "-Xmx8g"]))
    if not qsig:
        logger.info("There is no qsignature tool. Skipping...")
        return None

    position = dd.get_qsig_file(data)
    mixup_check = dd.get_mixup_check(data)
    if mixup_check and mixup_check.startswith("qsignature"):
        utils.safe_makedir(out_dir)
        if not position:
            logger.info("There is no qsignature for this species: %s"
                        % tz.get_in(['genome_build'], data))
            return None
        if mixup_check == "qsignature_full":
            down_bam = bam_file
        else:
            down_bam = _slice_bam_chr21(bam_file, data)
            position = _slice_vcf_chr21(position, out_dir)

        out_name = os.path.basename(down_bam).replace("bam", "qsig.vcf")
        out_file = os.path.join(out_dir, out_name)
        log_file = os.path.join(out_dir, "qsig.log")
        cores = dd.get_cores(data)
        base_cmd = ("{qsig} {jvm_opts} "
                    "org.qcmg.sig.SignatureGenerator "
                    "--noOfThreads {cores} "
                    "-log {log_file} -i {position} "
                    "-i {down_bam} ")
        if not os.path.exists(out_file):
            file_qsign_out = "{0}.qsig.vcf".format(down_bam)
            do.run(base_cmd.format(**locals()), "qsignature vcf generation: %s" % dd.get_sample_name(data))
            if os.path.exists(file_qsign_out):
                with file_transaction(data, out_file) as file_txt_out:
                    shutil.move(file_qsign_out, file_txt_out)
            else:
                raise IOError("File doesn't exist %s" % file_qsign_out)
        return out_file
    return None
Beispiel #5
0
def _parse_qsignature_output(in_file, out_file, warning_file, data):
    """ Parse xml file produced by qsignature

    :param in_file: (str) with the path to the xml file
    :param out_file: (str) with the path to output file
    :param warning_file: (str) with the path to warning file

    :returns: (list) with samples that could be duplicated

    """
    import lxml
    name = {}
    error, warnings, similar = set(), set(), set()
    same, replicate, related = 0, 0.1, 0.18
    mixup_check = dd.get_mixup_check(data)
    if mixup_check == "qsignature_full":
        same, replicate, related = 0, 0.01, 0.061
    with open(in_file, 'r') as in_handle:
        with file_transaction(data, out_file) as out_tx_file:
            with file_transaction(data, warning_file) as warn_tx_file:
                with open(out_tx_file, 'w') as out_handle:
                    with open(warn_tx_file, 'w') as warn_handle:
                        et = lxml.etree.parse(in_handle)
                        for i in list(et.iter('file')):
                            name[i.attrib['id']] = os.path.basename(
                                i.attrib['name']).replace(".qsig.vcf", "")
                        for i in list(et.iter('comparison')):
                            msg = None
                            pair = "-".join([
                                name[i.attrib['file1']],
                                name[i.attrib['file2']]
                            ])
                            out_handle.write(
                                "%s\t%s\t%s\n" %
                                (name[i.attrib['file1']],
                                 name[i.attrib['file2']], i.attrib['score']))
                            if float(i.attrib['score']) == same:
                                msg = 'qsignature ERROR: read same samples:%s\n'
                                error.add(pair)
                            elif float(i.attrib['score']) < replicate:
                                msg = 'qsignature WARNING: read similar/replicate samples:%s\n'
                                warnings.add(pair)
                            elif float(i.attrib['score']) < related:
                                msg = 'qsignature NOTE: read relative samples:%s\n'
                                similar.add(pair)
                            if msg:
                                logger.info(msg % pair)
                                warn_handle.write(msg % pair)
    return error, warnings, similar
def _parse_qsignature_output(in_file, out_file, warning_file, data):
    """ Parse xml file produced by qsignature

    :param in_file: (str) with the path to the xml file
    :param out_file: (str) with the path to output file
    :param warning_file: (str) with the path to warning file

    :returns: (list) with samples that could be duplicated

    """
    name = {}
    error, warnings, similar = set(), set(), set()
    same, replicate, related = 0, 0.1, 0.18
    mixup_check = dd.get_mixup_check(data)
    if mixup_check == "qsignature_full":
        same, replicate, related = 0, 0.01, 0.061
    with open(in_file, 'r') as in_handle:
        with file_transaction(data, out_file) as out_tx_file:
            with file_transaction(data, warning_file) as warn_tx_file:
                with open(out_tx_file, 'w') as out_handle:
                    with open(warn_tx_file, 'w') as warn_handle:
                        et = lxml.etree.parse(in_handle)
                        for i in list(et.iter('file')):
                            name[i.attrib['id']] = os.path.basename(i.attrib['name']).replace(".qsig.vcf", "")
                        for i in list(et.iter('comparison')):
                            msg = None
                            pair = "-".join([name[i.attrib['file1']], name[i.attrib['file2']]])
                            out_handle.write("%s\t%s\t%s\n" %
                                             (name[i.attrib['file1']], name[i.attrib['file2']], i.attrib['score']))
                            if float(i.attrib['score']) == same:
                                msg = 'qsignature ERROR: read same samples:%s\n'
                                error.add(pair)
                            elif float(i.attrib['score']) < replicate:
                                msg = 'qsignature WARNING: read similar/replicate samples:%s\n'
                                warnings.add(pair)
                            elif float(i.attrib['score']) < related:
                                msg = 'qsignature NOTE: read relative samples:%s\n'
                                similar.add(pair)
                            if msg:
                                logger.info(msg % pair)
                                warn_handle.write(msg % pair)
    return error, warnings, similar