Esempio n. 1
0
File: tophat.py Progetto: roryk/bipy
def _bcbio_tophat_wrapper(fastq_file, pair_file, ref_file,
                          stage_name, config):
    bcbio_config = {}
    stage_config = config["stage"][stage_name]
    cores = stage_config.get("cores", 1)
    # use the listed quality format, if there isn't one, try to figure
    # out what format it is
    quality_format = stage_config.get("quality_format", None)
    if quality_format is None:
        fastq_format = fastqc.detect_fastq_format(fastq_file)
        quality_format = FASTQ_FORMAT_TO_BCBIO[fastq_format]

    max_errors = stage_config.get("max_errors", None)
    options = stage_config.get("options", {})
    tophat_loc = config["program"].get("tophat", "tophat")
    bowtie_loc = config["program"].get("bowtie", "bowtie")
    out_base = remove_suffix(os.path.basename(fastq_file))
    align_dir = os.path.join(config["dir"]["results"], stage_name)

    bcbio_config["resources"] = {"tophat": {"cores": cores,
                                            "options": options}}
    bcbio_config["algorithm"] = {}
    bcbio_config["program"] = {}
    bcbio_config["algorithm"]["quality_format"] = quality_format
    bcbio_config["algorithm"]["max_errors"] = max_errors
    bcbio_config["gtf"] = config.get("gtf", None)
    if bcbio_config["gtf"]:
        if not file_exists(bcbio_config["gtf"]):
            raise ValueError("GTF file does not exist. Please check to make sure that "
                             "the value of gtf is set corretly in the configuration file.")
            sys.exit(1)
    bcbio_config["program"]["tophat"] = tophat_loc
    bcbio_config["program"]["bowtie"] = bowtie_loc
    bcbio_config["program"]["picard"] = config["program"]["picard"]
    bcbio_config["program"]["gatk"] = {"dir": ""}

    out_file = tophat.align(fastq_file, pair_file, ref_file, out_base,
                            align_dir, bcbio_config)
    return out_file
Esempio n. 2
0
def _bcbio_tophat_wrapper(fastq_file, pair_file, ref_file,
                          stage_name, config):
    bcbio_config = {}
    stage_config = config["stage"][stage_name]
    cores = config["cluster"].get("cores", None)
    # use the listed quality format, if there isn't one, try to figure
    # out what format it is
    quality_format = stage_config.get("quality_format", None)
    if quality_format is None:
        fastq_format = fastqc.detect_fastq_format(fastq_file)
        quality_format = FASTQ_FORMAT_TO_BCBIO[fastq_format]

    max_errors = stage_config.get("max_errors", None)
    tophat_loc = config["program"].get("tophat", "tophat")
    bowtie_loc = config["program"].get("bowtie", "bowtie")
    out_base = remove_suffix(os.path.basename(fastq_file))
    align_dir = os.path.join(config["dir"]["results"], stage_name)

    bcbio_config["resources"] = {"tophat": {"cores": cores}}
    bcbio_config["algorithm"] = {}
    bcbio_config["program"] = {}
    bcbio_config["algorithm"]["quality_format"] = quality_format
    bcbio_config["algorithm"]["max_errors"] = max_errors
    bcbio_config["gtf"] = config.get("gtf", None)
    bcbio_config["program"]["tophat"] = tophat_loc
    bcbio_config["program"]["bowtie"] = bowtie_loc

    out_file = tophat.align(fastq_file, pair_file, ref_file, out_base,
                            align_dir, bcbio_config)
    os.remove(out_file)

    out_dir = os.path.dirname(out_file)
    out_file_fixed = os.path.join(out_dir, out_base + ".sam")
    os.symlink("accepted_hits.sam", out_file_fixed)

    return out_file_fixed
Esempio n. 3
0
def _get_quality_type(in_file):
    """ get fastq quality format. if multiple types are detected,
    pick the first one. no quality type is found assume sanger """
    fastq_format = detect_fastq_format(in_file)
    return _FASTQ_TYPE_TO_FLAG.get(fastq_format[0], "sanger")